mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #49520 from IvanTakarlikov-st/it-grammar-fixes
Fix some grammar mistakes in documentation, code and tests
This commit is contained in:
commit
7a727bbbe3
@ -8,7 +8,7 @@
|
||||
|
||||
/*
|
||||
* (all numbers are written in big-endian manner: the least significant digit on the right)
|
||||
* (only bit representations are used - no hex or octal, leading zeroes are ommited)
|
||||
* (only bit representations are used - no hex or octal, leading zeroes are omitted)
|
||||
*
|
||||
* Consistent hashing scheme:
|
||||
*
|
||||
|
@ -148,7 +148,7 @@ Valid values:
|
||||
- `all` (default) - a universal rule, used when `rule_type` is omitted.
|
||||
- `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
|
||||
- `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
|
||||
- `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
|
||||
- `tag_list` - a rule for tagged metrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
|
||||
- `regexp` – A pattern for the metric name (a regular or DSL).
|
||||
- `age` – The minimum age of the data in seconds.
|
||||
- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
|
||||
|
@ -727,7 +727,7 @@ TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPR
|
||||
SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
|
||||
```
|
||||
|
||||
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
|
||||
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value across the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_for_aggregation
|
||||
|
@ -242,7 +242,7 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
|
||||
|
||||
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
|
||||
|
||||
To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
|
||||
To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
|
@ -120,7 +120,7 @@ Some comments about the `sentiment` table:
|
||||
- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
|
||||
- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
|
||||
|
||||
Here is the defintion of `sentiment.py`:
|
||||
Here is the definition of `sentiment.py`:
|
||||
|
||||
```python
|
||||
#!/usr/local/bin/python3.9
|
||||
|
@ -14,7 +14,7 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`
|
||||
|
||||
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
|
||||
|
||||
If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
|
||||
If this argument is not specified, ClickHouse detects the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
|
||||
|
||||
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
|
||||
|
||||
|
@ -308,7 +308,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
|
||||
![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
|
||||
|
||||
:::note
|
||||
If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
|
||||
If **ClickHouse Connect** is not one of your options, then you will need to install it. The command is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
|
||||
:::
|
||||
|
||||
#### Add your connection details:
|
||||
|
@ -261,5 +261,5 @@ The results look like
|
||||
```
|
||||
|
||||
:::note
|
||||
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
|
||||
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022.
|
||||
:::
|
@ -208,7 +208,7 @@ Default value: `3600` (1 hour).
|
||||
## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec}
|
||||
|
||||
Parameter of a task that cleans up garbage from `store/` directory.
|
||||
If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
|
||||
If some subdirectory is not used by clickhouse-server and it was previously "hidden"
|
||||
(see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
|
||||
and this directory was not modified for last
|
||||
`database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
|
||||
|
@ -1027,7 +1027,7 @@ Timeout to close idle TCP connections after specified number of seconds.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer (0 - close immediatly, after 0 seconds).
|
||||
- Positive integer (0 - close immediately, after 0 seconds).
|
||||
|
||||
Default value: 3600.
|
||||
|
||||
@ -1733,7 +1733,7 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_isnert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)).
|
||||
We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
|
||||
This function does not work for non-replicated tables.
|
||||
@ -1939,8 +1939,8 @@ Do not merge aggregation states from different servers for distributed query pro
|
||||
Possible values:
|
||||
|
||||
- `0` — Disabled (final query processing is done on the initiator node).
|
||||
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
|
||||
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
|
||||
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
|
||||
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
|
||||
|
||||
Default value: `0`
|
||||
|
||||
@ -4110,7 +4110,7 @@ Enabled by default.
|
||||
|
||||
## use_hedged_requests {#use_hedged_requests}
|
||||
|
||||
Enables hadged requests logic for remote queries. It allows to establish many connections with different replicas for query.
|
||||
Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query.
|
||||
New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout`
|
||||
or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`);
|
||||
other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported.
|
||||
|
@ -183,7 +183,7 @@ Arguments:
|
||||
- `-S`, `--structure` — table structure for input data.
|
||||
- `--input-format` — input format, `TSV` by default.
|
||||
- `-f`, `--file` — path to data, `stdin` by default.
|
||||
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
|
||||
- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option.
|
||||
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
|
||||
- `-N`, `--table` — table name where to put output data, `table` by default.
|
||||
- `--format`, `--output-format` — output format, `TSV` by default.
|
||||
|
@ -23,7 +23,7 @@ Alias: `medianDeterministic`.
|
||||
|
||||
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
|
||||
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
|
||||
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly.
|
||||
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occurs too often, the function works incorrectly.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -949,7 +949,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
|
||||
...
|
||||
```
|
||||
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported.
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
|
||||
|
||||
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
|
||||
|
||||
|
@ -314,7 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
|
||||
|
||||
**Returned values**
|
||||
|
||||
Returns result of logical disjuction.
|
||||
Returns result of logical disjunction.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
|
@ -256,7 +256,7 @@ Result:
|
||||
|
||||
## bitmapCardinality
|
||||
|
||||
Rerturn the cardinality of a bitmap.
|
||||
Returns the cardinality of a bitmap.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -14,7 +14,7 @@ The following types can be compared:
|
||||
- dates
|
||||
- dates with times
|
||||
|
||||
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not accross groups (e.g. UInt16 and DateTime).
|
||||
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not across groups (e.g. UInt16 and DateTime).
|
||||
|
||||
Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters.
|
||||
|
||||
|
@ -289,7 +289,7 @@ Aliases: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
Converts a date or date with time to the number of the day in the week as UInt8 value.
|
||||
|
||||
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument.
|
||||
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
|
||||
|
||||
| Mode | First day of week | Range |
|
||||
|------|-------------------|------------------------------------------------|
|
||||
|
@ -84,7 +84,7 @@ Result:
|
||||
|
||||
## s2GetNeighbors
|
||||
|
||||
Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
|
||||
Returns S2 neighbor indexes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -206,7 +206,7 @@ s2CapUnion(center1, radius1, center2, radius2)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Returned values**
|
||||
|
@ -64,7 +64,7 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
@ -84,7 +84,7 @@ Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-opera
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1`, if at least one argument evalutes to `true`,
|
||||
- `1`, if at least one argument evaluates to `true`,
|
||||
- `0`, if all arguments evaluate to `false`,
|
||||
- `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
|
||||
|
||||
@ -173,7 +173,7 @@ xor(val1, val2...)
|
||||
**Returned value**
|
||||
|
||||
- `1`, for two values: if one of the values evaluates to `false` and other does not,
|
||||
- `0`, for two values: if both values evalute to `false` or to both `true`,
|
||||
- `0`, for two values: if both values evaluate to `false` or to both `true`,
|
||||
- `NULL`, if at least one of the inputs is `NULL`
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
|
||||
|
@ -187,7 +187,7 @@ detectLanguageMixed('text_to_be_analyzed')
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
|
||||
|
||||
|
||||
**Examples**
|
||||
|
@ -306,7 +306,7 @@ You can use this function in table engine parameters in a CREATE TABLE query whe
|
||||
|
||||
## currentUser()
|
||||
|
||||
Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query.
|
||||
Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query.
|
||||
|
||||
``` sql
|
||||
SELECT currentUser();
|
||||
@ -317,7 +317,7 @@ Alias: `user()`, `USER()`.
|
||||
**Returned values**
|
||||
|
||||
- Login of current user.
|
||||
- Login of user that initiated query in case of disributed query.
|
||||
- Login of user that initiated query in case of distributed query.
|
||||
|
||||
Type: `String`.
|
||||
|
||||
|
@ -19,13 +19,13 @@ The random numbers are generated by non-cryptographic algorithms.
|
||||
|
||||
## rand, rand32
|
||||
|
||||
Returns a random UInt32 number, evenly distributed accross the range of all possible UInt32 numbers.
|
||||
Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
|
||||
|
||||
Uses a linear congruential generator.
|
||||
|
||||
## rand64
|
||||
|
||||
Returns a random UInt64 number, evenly distributed accross the range of all possible UInt64 numbers.
|
||||
Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
|
||||
|
||||
Uses a linear congruential generator.
|
||||
|
||||
|
@ -310,7 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
|
||||
|
||||
## repeat
|
||||
|
||||
Conatenates a string as many times with itself as specified.
|
||||
Concatenates a string as many times with itself as specified.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -133,7 +133,7 @@ Tuples should have the same type of the elements.
|
||||
|
||||
- The Hamming distance.
|
||||
|
||||
Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
|
||||
Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -223,7 +223,7 @@ Result:
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
It is possible to transform colums to rows using this function:
|
||||
It is possible to transform columns to rows using this function:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
|
||||
|
@ -449,7 +449,7 @@ mapExtractKeyLike(map, pattern)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -116,7 +116,7 @@ The column description can specify a default value expression in the form of `DE
|
||||
|
||||
The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
|
||||
|
||||
The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
The column type of a default value column can be omitted in which case it is inferred from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
|
||||
If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
|
@ -34,7 +34,7 @@ If the `alter_sync` is set to `2` and some replicas are not active for more than
|
||||
|
||||
## BY expression
|
||||
|
||||
If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
|
||||
If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explicitly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
|
||||
|
||||
:::note
|
||||
Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion.
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: hdfsCluster
|
||||
|
||||
# hdfsCluster Table Function
|
||||
|
||||
Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -53,7 +53,7 @@ The `remote` table function can be useful in the following cases:
|
||||
- Infrequent distributed requests that are made manually.
|
||||
- Distributed requests where the set of servers is re-defined each time.
|
||||
|
||||
### Adresses
|
||||
### Addresses
|
||||
|
||||
``` text
|
||||
example01-01-1
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: s3Cluster
|
||||
title: "s3Cluster Table Function"
|
||||
---
|
||||
|
||||
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -80,7 +80,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
- `PARTITION BY` - defines how to break a resultset into groups.
|
||||
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
|
||||
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
|
||||
- `WINDOW` - allows to reuse a window definition with multiple exressions.
|
||||
- `WINDOW` - allows to reuse a window definition with multiple expressions.
|
||||
|
||||
### Functions
|
||||
|
||||
|
@ -107,7 +107,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
Совместима с шифрованием mysql, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
|
||||
При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
|
||||
|
||||
@ -298,7 +298,7 @@ SELECT comment, decrypt('aes-256-ofb', secret, '12345678910121314151617181920212
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
Совместима с шифрованием mysql и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
|
||||
При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
|
||||
|
||||
|
@ -778,7 +778,7 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
|
||||
|
||||
## zookeeper {#server-settings_zookeeper}
|
||||
|
||||
包含允许ClickHouse与 [zookpeer](http://zookeeper.apache.org/) 集群。
|
||||
包含允许ClickHouse与 [zookeeper](http://zookeeper.apache.org/) 集群。
|
||||
|
||||
ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复制的表,则可以省略此部分参数。
|
||||
|
||||
|
@ -37,7 +37,7 @@ using RWLock = std::shared_ptr<RWLockImpl>;
|
||||
///
|
||||
/// NOTE: it is dangerous to acquire lock with NO_QUERY, because FastPath doesn't
|
||||
/// exist for this case and deadlock, described in previous note,
|
||||
/// may accur in case of recursive locking.
|
||||
/// may occur in case of recursive locking.
|
||||
class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
|
||||
{
|
||||
public:
|
||||
|
@ -44,7 +44,7 @@ struct NetworkInterfaces
|
||||
std::optional<Poco::Net::IPAddress> interface_address;
|
||||
switch (family)
|
||||
{
|
||||
/// We interested only in IP-adresses
|
||||
/// We interested only in IP-addresses
|
||||
case AF_INET:
|
||||
{
|
||||
interface_address.emplace(*(iface->ifa_addr));
|
||||
|
@ -219,7 +219,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
|
||||
{
|
||||
std::vector<Strings> shards;
|
||||
{
|
||||
std::map<size_t, Strings> replica_adresses;
|
||||
std::map<size_t, Strings> replica_addresses;
|
||||
|
||||
for (const auto & [_, node] : cluster_info.nodes_info)
|
||||
{
|
||||
@ -228,11 +228,11 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
|
||||
LOG_WARNING(log, "Node '{}' in cluster '{}' has different 'secure' value, skipping it", node.address, cluster_info.name);
|
||||
continue;
|
||||
}
|
||||
replica_adresses[node.shard_id].emplace_back(node.address);
|
||||
replica_addresses[node.shard_id].emplace_back(node.address);
|
||||
}
|
||||
|
||||
shards.reserve(replica_adresses.size());
|
||||
for (auto & [_, replicas] : replica_adresses)
|
||||
shards.reserve(replica_addresses.size());
|
||||
for (auto & [_, replicas] : replica_addresses)
|
||||
shards.emplace_back(std::move(replicas));
|
||||
}
|
||||
|
||||
|
@ -290,7 +290,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
|
||||
.callback = read_task_callback.value(),
|
||||
.count_participating_replicas = client_info.count_participating_replicas,
|
||||
.number_of_current_replica = client_info.number_of_current_replica,
|
||||
.colums_to_read = required_columns
|
||||
.columns_to_read = required_columns
|
||||
};
|
||||
|
||||
/// We have a special logic for local replica. It has to read less data, because in some cases it should
|
||||
@ -734,7 +734,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
|
||||
.callback = read_task_callback.value(),
|
||||
.count_participating_replicas = client_info.count_participating_replicas,
|
||||
.number_of_current_replica = client_info.number_of_current_replica,
|
||||
.colums_to_read = column_names
|
||||
.columns_to_read = column_names
|
||||
};
|
||||
|
||||
auto min_marks_for_concurrent_read = info.min_marks_for_concurrent_read;
|
||||
|
@ -34,7 +34,7 @@ struct ParallelReadingExtension
|
||||
/// This is needed to estimate the number of bytes
|
||||
/// between a pair of marks to perform one request
|
||||
/// over the network for a 1Gb of data.
|
||||
Names colums_to_read;
|
||||
Names columns_to_read;
|
||||
};
|
||||
|
||||
/// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm
|
||||
|
@ -378,7 +378,7 @@ MergeTreeReadPoolParallelReplicas::~MergeTreeReadPoolParallelReplicas() = defaul
|
||||
|
||||
Block MergeTreeReadPoolParallelReplicas::getHeader() const
|
||||
{
|
||||
return storage_snapshot->getSampleBlockForColumns(extension.colums_to_read);
|
||||
return storage_snapshot->getSampleBlockForColumns(extension.columns_to_read);
|
||||
}
|
||||
|
||||
MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread)
|
||||
|
@ -15,7 +15,7 @@ class Context;
|
||||
/**
|
||||
* hdfsCluster(cluster, URI, format, structure, compression_method)
|
||||
* A table function, which allows to process many files from HDFS on a specific cluster
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterics
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
|
||||
* in HDFS file path and dispatch each file dynamically.
|
||||
* On worker node it asks initiator about next task to process, processes it.
|
||||
* This is repeated until the tasks are finished.
|
||||
|
@ -17,7 +17,7 @@ class Context;
|
||||
/**
|
||||
* s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
|
||||
* A table function, which allows to process many files from S3 on a specific cluster
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterics
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
|
||||
* in S3 file path and dispatch each file dynamically.
|
||||
* On worker node it asks initiator about next task to process, processes it.
|
||||
* This is repeated until the tasks are finished.
|
||||
|
@ -110,16 +110,16 @@ def test_select_all(started_cluster):
|
||||
ORDER BY (name, value, polygon)"""
|
||||
)
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query(
|
||||
s3_distributed = node.query(
|
||||
"""
|
||||
SELECT * from s3Cluster(
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)"""
|
||||
)
|
||||
# print(s3_distibuted)
|
||||
# print(s3_distributed)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
assert TSV(pure_s3) == TSV(s3_distributed)
|
||||
|
||||
|
||||
def test_count(started_cluster):
|
||||
@ -132,16 +132,16 @@ def test_count(started_cluster):
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
|
||||
)
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query(
|
||||
s3_distributed = node.query(
|
||||
"""
|
||||
SELECT count(*) from s3Cluster(
|
||||
'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
|
||||
)
|
||||
# print(s3_distibuted)
|
||||
# print(s3_distributed)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
assert TSV(pure_s3) == TSV(s3_distributed)
|
||||
|
||||
|
||||
def test_count_macro(started_cluster):
|
||||
@ -154,17 +154,17 @@ def test_count_macro(started_cluster):
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
|
||||
)
|
||||
# print(s3_distibuted)
|
||||
s3_distibuted = node.query(
|
||||
# print(s3_distributed)
|
||||
s3_distributed = node.query(
|
||||
"""
|
||||
SELECT count(*) from s3Cluster(
|
||||
'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
|
||||
)
|
||||
# print(s3_distibuted)
|
||||
# print(s3_distributed)
|
||||
|
||||
assert TSV(s3_macro) == TSV(s3_distibuted)
|
||||
assert TSV(s3_macro) == TSV(s3_distributed)
|
||||
|
||||
|
||||
def test_union_all(started_cluster):
|
||||
@ -187,7 +187,7 @@ def test_union_all(started_cluster):
|
||||
"""
|
||||
)
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query(
|
||||
s3_distributed = node.query(
|
||||
"""
|
||||
SELECT * FROM
|
||||
(
|
||||
@ -204,9 +204,9 @@ def test_union_all(started_cluster):
|
||||
ORDER BY (name, value, polygon)
|
||||
"""
|
||||
)
|
||||
# print(s3_distibuted)
|
||||
# print(s3_distributed)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
assert TSV(pure_s3) == TSV(s3_distributed)
|
||||
|
||||
|
||||
def test_wrong_cluster(started_cluster):
|
||||
|
@ -661,7 +661,7 @@ def test_virtual_columns(started_cluster):
|
||||
time.sleep(0.5)
|
||||
result = instance.query("SELECT count() FROM test.postgresql_replica;")
|
||||
|
||||
# just check that it works, no check with `expected` becuase _version is taken as LSN, which will be different each time.
|
||||
# just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time.
|
||||
result = instance.query(
|
||||
"SELECT key, value, _sign, _version FROM test.postgresql_replica;"
|
||||
)
|
||||
|
@ -13,7 +13,7 @@ SELECT c FROM testing ORDER BY e, d;
|
||||
4
|
||||
1
|
||||
3
|
||||
-- update all colums used by proj_1
|
||||
-- update all columns used by proj_1
|
||||
ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2;
|
||||
SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done;
|
||||
SELECT c FROM testing ORDER BY d;
|
||||
|
@ -25,7 +25,7 @@ OPTIMIZE TABLE testing FINAL;
|
||||
SELECT c FROM testing ORDER BY d;
|
||||
SELECT c FROM testing ORDER BY e, d;
|
||||
|
||||
-- update all colums used by proj_1
|
||||
-- update all columns used by proj_1
|
||||
ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2;
|
||||
|
||||
SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done;
|
||||
|
@ -1,3 +1,3 @@
|
||||
-- check that distinct with and w/o optimization produce the same result
|
||||
-- DISTINCT colums are the same as in ORDER BY
|
||||
-- DISTINCT colums has prefix in ORDER BY columns
|
||||
-- DISTINCT columns are the same as in ORDER BY
|
||||
-- DISTINCT columns has prefix in ORDER BY columns
|
||||
|
@ -3,7 +3,7 @@ select '-- check that distinct with and w/o optimization produce the same result
|
||||
drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- DISTINCT colums are the same as in ORDER BY';
|
||||
select '-- DISTINCT columns are the same as in ORDER BY';
|
||||
create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
|
||||
insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
|
||||
@ -13,7 +13,7 @@ select distinct * from distinct_in_order except select * from ordinary_distinct;
|
||||
drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- DISTINCT colums has prefix in ORDER BY columns';
|
||||
select '-- DISTINCT columns has prefix in ORDER BY columns';
|
||||
create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
|
||||
insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate);
|
||||
|
@ -85,7 +85,7 @@ SELECT 1.0, 1
|
||||
----
|
||||
1.000 1
|
||||
|
||||
# mess with colums count
|
||||
# mess with columns count
|
||||
query RT nosort
|
||||
SELECT 1.0
|
||||
----
|
||||
|
Loading…
Reference in New Issue
Block a user