Merge branch 'master' into in_memory_raft

This commit is contained in:
alesapin 2021-02-09 16:06:31 +03:00
commit 9667bdcbd2
87 changed files with 2350 additions and 461 deletions

View File

@ -12,6 +12,9 @@ List of supported integrations:
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
- [S3](../../../engines/table-engines/integrations/s3.md)
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)

View File

@ -0,0 +1,57 @@
---
toc_priority: 7
toc_title: MongoDB
---
# MongoDB {#mongodb}
MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
```
**Engine Parameters**
- `host:port` — MongoDB server address.
- `database` — Remote database name.
- `collection` — Remote collection name.
- `user` — MongoDB user.
- `password` — User password.
## Usage Example {#usage-example}
Table in ClickHouse which allows to read data from MongoDB collection:
``` text
CREATE TABLE mongo_table
(
key UInt64,
data String
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
Query:
``` sql
SELECT COUNT() FROM mongo_table;
```
``` text
┌─count()─┐
│ 4 │
└─────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) <!--hide-->

View File

@ -29,6 +29,8 @@ Lets look at the section of the users.xml file that defines quotas.
<!-- Unlimited. Just collect data for the specified time interval. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo
<duration>3600</duration>
<queries>1000</queries>
<query_selects>100</query_selects>
<query_inserts>100</query_inserts>
<errors>100</errors>
<result_rows>1000000000</result_rows>
<read_rows>100000000000</read_rows>
@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo
<duration>86400</duration>
<queries>10000</queries>
<query_selects>10000</query_selects>
<query_inserts>10000</query_inserts>
<errors>1000</errors>
<result_rows>5000000000</result_rows>
<read_rows>500000000000</read_rows>
@ -74,6 +80,10 @@ Here are the amounts that can be restricted:
`queries` The total number of requests.
`query_selects` The total number of select requests.
`query_inserts` The total number of insert requests.
`errors` The number of queries that threw an exception.
`result_rows` The total number of rows given as a result.

View File

@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../
The `system.part_log` table contains the following columns:
- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
- `NEW_PART` — Inserting of a new data part.
- `MERGE_PARTS` — Merging of data parts.
- `DOWNLOAD_PART` — Downloading a data part.
- `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
- `MUTATE_PART` — Mutating of a data part.
- `MOVE_PART` — Moving the data part from the one disk to another one.
- `event_date` (Date) — Event date.
- `event_time` (DateTime) — Event time.
- `duration_ms` (UInt64) — Duration.
- `database` (String) — Name of the database the data part is in.
- `table` (String) — Name of the table the data part is in.
- `part_name` (String) — Name of the data part.
- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the all value if the partitioning is by `tuple()`.
- `rows` (UInt64) — The number of rows in the data part.
- `size_in_bytes` (UInt64) — Size of the data part in bytes.
- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
- `read_rows` (UInt64) — The number of rows was read during the merge.
- `read_bytes` (UInt64) — The number of bytes was read during the merge.
- `error` (UInt16) — The code number of the occurred error.
- `exception` (String) — Text message of the occurred error.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`.
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge).
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error.
The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
**Example**
``` sql
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
event_time_microseconds: 2021-02-02 11:14:28.861919
duration_ms: 35
database: default
table: log_mt_2
part_name: all_1_1_0
partition_id: all
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
rows: 115418
size_in_bytes: 1074311
merged_from: []
bytes_uncompressed: 0
read_rows: 0
read_bytes: 0
peak_memory_usage: 0
error: 0
exception:
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) <!--hide-->

View File

@ -9,6 +9,8 @@ Columns:
- `0` — Interval is not randomized.
- `1` — Interval is randomized.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.

View File

@ -9,6 +9,8 @@ Columns:
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.

View File

@ -11,6 +11,10 @@ Columns:
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.

View File

@ -1,12 +1,16 @@
# system.zookeeper {#system-zookeeper}
The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
The query must have a path equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
The query must either have a path = condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for.
The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
To output data for all root nodes, write path = /.
If the path specified in path doesnt exist, an exception will be thrown.
The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node.
If in the specified path collection has doesn't exist path, an exception will be thrown.
It can be used to do a batch of ZooKeeper path queries.
Columns:
- `name` (String) — The name of the node.

View File

@ -4,13 +4,42 @@ toc_priority: 106
# argMax {#agg-function-argmax}
Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Syntax**
**Example:**
``` sql
argMax(arg, val)
```
or
``` sql
argMax(tuple(arg, val))
```
**Parameters**
- `arg` — Argument.
- `val` — Value.
**Returned value**
- `arg` value that corresponds to maximum `val` value.
Type: matches `arg` type.
For tuple in the input:
- Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value.
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
**Example**
Input table:
``` text
┌─user─────┬─salary─┐
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu
└──────────┴────────┘
```
Query:
``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
```
Result:
``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->

View File

@ -4,13 +4,42 @@ toc_priority: 105
# argMin {#agg-function-argmin}
Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Syntax**
**Example:**
``` sql
argMin(arg, val)
```
or
``` sql
argMin(tuple(arg, val))
```
**Parameters**
- `arg` — Argument.
- `val` — Value.
**Returned value**
- `arg` value that corresponds to minimum `val` value.
Type: matches `arg` type.
For tuple in the input:
- Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value.
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
**Example**
Input table:
``` text
┌─user─────┬─salary─┐
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu
└──────────┴────────┘
```
Query:
``` sql
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
```
Result:
``` text
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
│ worker │ ('worker',1000) │
└──────────────────────┴─────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

View File

@ -0,0 +1,71 @@
---
toc_priority: 310
toc_title: mannWhitneyUTest
---
# mannWhitneyUTest {#mannwhitneyutest}
Applies the Mann-Whitney rank test to samples from two populations.
**Syntax**
``` sql
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
**Parameters**
- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
- `'two-sided'`;
- `'greater'`;
- `'less'`.
- `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 10 │ 0 │
│ 11 │ 0 │
│ 12 │ 0 │
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
```
Result:
``` text
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
│ (9,0.04042779918503192) │
└────────────────────────────────────────────────────────┘
```
**See Also**
- [MannWhitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)
- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 300
toc_title: studentTTest
---
# studentTTest {#studentttest}
Applies Student's t-test to samples from two populations.
**Syntax**
``` sql
studentTTest(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
**Parameters**
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 21.1 │ 0 │
│ 21.9 │ 1 │
│ 21.7 │ 0 │
│ 19.9 │ 1 │
│ 21.8 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
```
Result:
``` text
┌─studentTTest(sample_data, sample_index)───┐
│ (-0.21739130434783777,0.8385421208415731) │
└───────────────────────────────────────────┘
```
**See Also**
- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test)
- [welchTTest function](welchttest.md#welchttest)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 301
toc_title: welchTTest
---
# welchTTest {#welchttest}
Applies Welch's t-test to samples from two populations.
**Syntax**
``` sql
welchTTest(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
**Parameters**
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 22.1 │ 0 │
│ 21.9 │ 0 │
│ 18.9 │ 1 │
│ 20.3 │ 1 │
│ 19 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
```
Result:
``` text
┌─welchTTest(sample_data, sample_index)─────┐
│ (2.7988719532211235,0.051807360348581945) │
└───────────────────────────────────────────┘
```
**See Also**
- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
- [studentTTest function](studentttest.md#studentttest)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: QUOTA
# ALTER QUOTA {#alter-quota-statement}
Changes [quotas](../../../operations/access-rights.md#quotas-management).
Changes quotas.
Syntax:
@ -14,13 +14,13 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -13,14 +13,14 @@ Syntax:
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -12,7 +12,10 @@ toc_priority: 30
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) <!--hide-->

View File

@ -0,0 +1,57 @@
---
toc_priority: 7
toc_title: MongoDB
---
# MongoDB {#mongodb}
Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
```
**Параметры движка**
- `host:port` — адрес сервера MongoDB.
- `database` — имя базы данных на удалённом сервере.
- `collection` — имя коллекции на удалённом сервере.
- `user` — пользователь MongoDB.
- `password` — пароль пользователя.
## Примеры использования {#usage-example}
Таблица в ClickHouse для чтения данных из колекции MongoDB:
``` text
CREATE TABLE mongo_table
(
key UInt64,
data String
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
Запрос к таблице:
``` sql
SELECT COUNT() FROM mongo_table;
```
``` text
┌─count()─┐
│ 4 │
└─────────┘
```
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) <!--hide-->

View File

@ -6,29 +6,62 @@
Столбцы:
- `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок.
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
- `NEW_PART` — вставка нового куска.
- `MERGE_PARTS` — слияние кусков.
- `DOWNLOAD_PART` — загрузка с реплики.
- `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
- `MUTATE_PART` — изменение куска.
- `MOVE_PART` — перемещение куска между дисками.
- `event_date` (Date) — дата события.
- `event_time` (DateTime) — время события.
- `duration_ms` (UInt64) — длительность.
- `database` (String) — имя базы данных, в которой находится кусок.
- `table` (String) — имя таблицы, в которой находится кусок.
- `part_name` (String) — имя куска.
- `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение all, если таблица партициируется по выражению `tuple()`.
- `rows` (UInt64) — число строк в куске.
- `size_in_bytes` (UInt64) — размер куска данных в байтах.
- `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
- `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт.
- `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков.
- `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков.
- `error` (UInt16) — код ошибки, возникшей при текущем событии.
- `exception` (String) — текст ошибки.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность.
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
- `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`.
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии.
- `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки.
Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
**Пример**
``` sql
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
duration_ms: 35
database: default
table: log_mt_2
part_name: all_1_1_0
partition_id: all
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
rows: 115418
size_in_bytes: 1074311
merged_from: []
bytes_uncompressed: 0
read_rows: 0
read_bytes: 0
peak_memory_usage: 0
error: 0
exception:
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) <!--hide-->

View File

@ -4,8 +4,63 @@ toc_priority: 106
# argMax {#agg-function-argmax}
Синтаксис: `argMax(arg, val)`
Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений.
Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
**Синтаксис**
``` sql
argMax(arg, val)
```
или
``` sql
argMax(tuple(arg, val))
```
**Параметры**
- `arg` — аргумент.
- `val` — значение.
**Возвращаемое значение**
- Значение `arg`, соответствующее максимальному значению `val`.
Тип: соответствует типу `arg`.
Если передан кортеж:
- Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`.
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
**Пример**
Исходная таблица:
``` text
┌─user─────┬─salary─┐
│ director │ 5000 │
│ manager │ 3000 │
│ worker │ 1000 │
└──────────┴────────┘
```
Запрос:
``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
```
Результат:
``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->

View File

@ -4,11 +4,42 @@ toc_priority: 105
# argMin {#agg-function-argmin}
Синтаксис: `argMin(arg, val)`
Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений.
Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
**Пример:**
**Синтаксис**
``` sql
argMin(arg, val)
```
или
``` sql
argMin(tuple(arg, val))
```
**Параметры**
- `arg` — аргумент.
- `val` — значение.
**Возвращаемое значение**
- Значение `arg`, соответствующее минимальному значению `val`.
Тип: соответствует типу `arg`.
Если передан кортеж:
- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`.
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
**Пример**
Исходная таблица:
``` text
┌─user─────┬─salary─┐
@ -18,14 +49,18 @@ toc_priority: 105
└──────────┴────────┘
```
Запрос:
``` sql
SELECT argMin(user, salary) FROM salary
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
```
Результат:
``` text
┌─argMin(user, salary)─┐
│ worker │
└──────────────────────┘
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─
│ worker │ ('worker',1000) │
└──────────────────────┴─────────────────────────────
```
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

View File

@ -0,0 +1,71 @@
---
toc_priority: 310
toc_title: mannWhitneyUTest
---
# mannWhitneyUTest {#mannwhitneyutest}
Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние.
Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным.
**Параметры**
- `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
- `'two-sided'`;
- `'greater'`;
- `'less'`.
- `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 10 │ 0 │
│ 11 │ 0 │
│ 12 │ 0 │
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
```
Результат:
``` text
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
│ (9,0.04042779918503192) │
└────────────────────────────────────────────────────────┘
```
**Смотрите также**
- [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 300
toc_title: studentTTest
---
# studentTTest {#studentttest}
Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
studentTTest(sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать.
**Параметры**
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 21.1 │ 0 │
│ 21.9 │ 1 │
│ 21.7 │ 0 │
│ 19.9 │ 1 │
│ 21.8 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
```
Результат:
``` text
┌─studentTTest(sample_data, sample_index)───┐
│ (-0.21739130434783777,0.8385421208415731) │
└───────────────────────────────────────────┘
```
**Смотрите также**
- [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0)
- [welchTTest](welchttest.md#welchttest)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 301
toc_title: welchTTest
---
# welchTTest {#welchttest}
Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
welchTTest(sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать.
**Параметры**
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 22.1 │ 0 │
│ 21.9 │ 0 │
│ 18.9 │ 1 │
│ 20.3 │ 1 │
│ 19 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
```
Результат:
``` text
┌─welchTTest(sample_data, sample_index)─────┐
│ (2.7988719532211235,0.051807360348581945) │
└───────────────────────────────────────────┘
```
**Смотрите также**
- [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0)
- [studentTTest](studentttest.md#studentttest)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->

View File

@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
VersionedCollapsingMergeTree(sign, version)
```
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel”
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel”
列数据类型应为 `Int8`.

View File

@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
# 系统。动物园管理员 {#system-zookeeper}
如果未配置ZooKeeper则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。
查询必须具有 path WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
查询必须具有 path WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。
要输出所有根节点的数据write path= /.
如果在指定的路径 path 不存在,将引发异常。
查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。
如果在指定的 path 集合中有不存在的路径,将引发异常。
它可以用来做一批ZooKeeper路径查询。
列:
- `name` (String) — The name of the node.

View File

@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
}
}
/// Delete helping tables in both cases (whole table is done or not)
dropHelpingTables(task_table);
if (!table_is_done)
{
throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution",
@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
{
LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
}
else
{
/// Delete helping tables in case that whole table is done
dropHelpingTables(task_table);
}
return table_is_done;
}

View File

@ -31,6 +31,8 @@ struct Quota : public IAccessEntity
enum ResourceType
{
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of inserts queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType
static const auto info = make_info("QUERIES", 1);
return info;
}
case Quota::QUERY_SELECTS:
{
static const auto info = make_info("QUERY_SELECTS", 1);
return info;
}
case Quota::QUERY_INSERTS:
{
static const auto info = make_info("QUERY_INSERTS", 1);
return info;
}
case Quota::ERRORS:
{
static const auto info = make_info("ERRORS", 1);

View File

@ -147,7 +147,7 @@ public:
}
if (params[0].getType() != Field::Types::String)
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception("Aggregate function " + getName() + " require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto param = params[0].get<String>();
if (param == "two-sided")
@ -158,13 +158,13 @@ public:
alternative = Alternative::Greater;
else
throw Exception("Unknown parameter in aggregate function " + getName() +
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
". It must be one of: 'two-sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
if (params.size() != 2)
return;
if (params[1].getType() != Field::Types::UInt64)
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception("Aggregate function " + getName() + " require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
}

View File

@ -75,8 +75,28 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
ColumnAggregateFunction::~ColumnAggregateFunction()
{
if (!func->hasTrivialDestructor() && !src)
for (auto * val : data)
func->destroy(val);
{
if (copiedDataInfo.empty())
{
for (auto * val : data)
{
func->destroy(val);
}
}
else
{
size_t pos;
for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
{
pos = it->getValue().second;
if (data[pos] != nullptr)
{
func->destroy(data[pos]);
data[pos] = nullptr;
}
}
}
}
}
void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
@ -455,14 +475,37 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
/// (only as a whole, see comment above).
ensureOwnership();
insertDefault();
insertMergeFrom(from, n);
insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
}
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
{
ensureOwnership();
insertDefault();
insertMergeFrom(place);
insertCopyFrom(place);
}
void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
{
Map::LookupResult result;
result = copiedDataInfo.find(place);
if (result == nullptr)
{
copiedDataInfo[place] = data.size()-1;
func->merge(data.back(), place, &createOrGetArena());
}
else
{
size_t pos = result->getValue().second;
if (pos != data.size() - 1)
{
data[data.size() - 1] = data[pos];
}
else /// insert same data to same pos, merge them.
{
func->merge(data.back(), place, &createOrGetArena());
}
}
}
void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
@ -697,5 +740,4 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
return cloned_col;
}
}
}

View File

@ -13,6 +13,8 @@
#include <Functions/FunctionHelpers.h>
#include <Common/HashTable/HashMap.h>
namespace DB
{
@ -82,6 +84,17 @@ private:
/// Name of the type to distinguish different aggregation states.
String type_string;
/// MergedData records, used to avoid duplicated data copy.
///key: src pointer, val: pos in current column.
using Map = HashMap<
ConstAggregateDataPtr,
size_t,
DefaultHash<ConstAggregateDataPtr>,
HashTableGrower<3>,
HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
Map copiedDataInfo;
ColumnAggregateFunction() {}
/// Create a new column that has another column as a source.
@ -140,6 +153,8 @@ public:
void insertFrom(ConstAggregateDataPtr place);
void insertCopyFrom(ConstAggregateDataPtr place);
/// Merge state at last row with specified state in another column.
void insertMergeFrom(ConstAggregateDataPtr place);

View File

@ -63,9 +63,6 @@ public:
/// Call from master thread as soon as possible (e.g. when thread accepted connection)
static void initializeQuery();
/// Sets query_context for current thread group
static void attachQueryContext(Context & query_context);
/// You must call one of these methods when create a query child thread:
/// Add current thread to a group associated with the thread group
static void attachTo(const ThreadGroupStatusPtr & thread_group);
@ -99,6 +96,10 @@ public:
private:
static void defaultThreadDeleter();
/// Sets query_context for current thread group
/// Can by used only through QueryScope
static void attachQueryContext(Context & query_context);
};
}

View File

@ -539,7 +539,8 @@
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \
M(1002, UNKNOWN_EXCEPTION)
M(1002, UNKNOWN_EXCEPTION) \
M(1003, INVALID_SHARD_ID)
/* See END */

View File

@ -109,6 +109,11 @@ struct HashMapCell
DB::assertChar(',', rb);
DB::readDoubleQuoted(value.second, rb);
}
static bool constexpr need_to_notify_cell_during_move = false;
static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
};
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>

View File

@ -69,11 +69,16 @@ namespace ZeroTraits
{
template <typename T>
bool check(const T x) { return x == 0; }
inline bool check(const T x) { return x == 0; }
template <typename T>
void set(T & x) { x = 0; }
inline void set(T & x) { x = 0; }
template <>
inline bool check(const char * x) { return x == nullptr; }
template <>
inline void set(const char *& x){ x = nullptr; }
}
@ -204,6 +209,13 @@ struct HashTableCell
/// Deserialization, in binary and text form.
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
/// When cell pointer is moved during erase, reinsert or resize operations
static constexpr bool need_to_notify_cell_during_move = false;
static void move(HashTableCell * /* old_location */, HashTableCell * /* new_location */) {}
};
/**
@ -334,6 +346,32 @@ struct ZeroValueStorage<false, Cell>
};
template <bool enable, typename Allocator, typename Cell>
struct AllocatorBufferDeleter;
template <typename Allocator, typename Cell>
struct AllocatorBufferDeleter<false, Allocator, Cell>
{
AllocatorBufferDeleter(Allocator &, size_t) {}
void operator()(Cell *) const {}
};
template <typename Allocator, typename Cell>
struct AllocatorBufferDeleter<true, Allocator, Cell>
{
AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
: allocator(allocator_)
, size(size_) {}
void operator()(Cell * buffer) const { allocator.free(buffer, size); }
Allocator & allocator;
size_t size;
};
// The HashTable
template
<
@ -427,7 +465,6 @@ protected:
}
}
/// Increase the size of the buffer.
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
{
@ -460,7 +497,24 @@ protected:
new_grower.increaseSize();
/// Expand the space.
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
size_t old_buffer_size = getBufferSizeInBytes();
/** If cell required to be notified during move we need to temporary keep old buffer
* because realloc does not quarantee for reallocated buffer to have same base address
*/
using Deleter = AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move, Allocator, Cell>;
Deleter buffer_deleter(*this, old_buffer_size);
std::unique_ptr<Cell, Deleter> old_buffer(buf, buffer_deleter);
if constexpr (Cell::need_to_notify_cell_during_move)
{
buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
}
else
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
grower = new_grower;
/** Now some items may need to be moved to a new location.
@ -470,7 +524,12 @@ protected:
size_t i = 0;
for (; i < old_size; ++i)
if (!buf[i].isZero(*this))
reinsert(buf[i], buf[i].getHash(*this));
{
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
if constexpr (Cell::need_to_notify_cell_during_move)
Cell::move(&(old_buffer.get())[i], &buf[updated_place_value]);
}
/** There is also a special case:
* if the element was to be at the end of the old buffer, [ x]
@ -481,7 +540,13 @@ protected:
* process tail from the collision resolution chain immediately after it [ o x ]
*/
for (; !buf[i].isZero(*this); ++i)
reinsert(buf[i], buf[i].getHash(*this));
{
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
if constexpr (Cell::need_to_notify_cell_during_move)
if (&buf[i] != &buf[updated_place_value])
Cell::move(&buf[i], &buf[updated_place_value]);
}
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
watch.stop();
@ -495,20 +560,20 @@ protected:
/** Paste into the new buffer the value that was in the old buffer.
* Used when increasing the buffer size.
*/
void reinsert(Cell & x, size_t hash_value)
size_t reinsert(Cell & x, size_t hash_value)
{
size_t place_value = grower.place(hash_value);
/// If the element is in its place.
if (&x == &buf[place_value])
return;
return place_value;
/// Compute a new location, taking into account the collision resolution chain.
place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
/// If the item remains in its place in the old collision resolution chain.
if (!buf[place_value].isZero(*this))
return;
return place_value;
/// Copy to a new location and zero the old one.
x.setHash(hash_value);
@ -516,6 +581,7 @@ protected:
x.setZero();
/// Then the elements that previously were in collision with this can move to the old place.
return place_value;
}
@ -881,7 +947,11 @@ public:
/// Reinsert node pointed to by iterator
void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
{
reinsert(*it.getPtr(), hash_value);
size_t place_value = reinsert(*it.getPtr(), hash_value);
if constexpr (Cell::need_to_notify_cell_during_move)
if (it.getPtr() != &buf[place_value])
Cell::move(it.getPtr(), &buf[place_value]);
}
@ -958,8 +1028,14 @@ public:
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
}
std::enable_if_t<Grower::performs_linear_probing_with_single_step, void>
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
ALWAYS_INLINE erase(const Key & x)
{
return erase(x, hash(x));
}
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
ALWAYS_INLINE erase(const Key & x, size_t hash_value)
{
/** Deletion from open addressing hash table without tombstones
*
@ -977,21 +1053,19 @@ public:
{
--m_size;
this->clearHasZero();
return true;
}
else
{
return;
return false;
}
}
size_t hash_value = hash(x);
size_t erased_key_position = findCell(x, hash_value, grower.place(hash_value));
/// Key is not found
if (buf[erased_key_position].isZero(*this))
{
return;
}
return false;
/// We need to guarantee loop termination because there will be empty position
assert(m_size < grower.bufSize());
@ -1056,12 +1130,18 @@ public:
/// Move the element to the freed place
memcpy(static_cast<void *>(&buf[erased_key_position]), static_cast<void *>(&buf[next_position]), sizeof(Cell));
if constexpr (Cell::need_to_notify_cell_during_move)
Cell::move(&buf[next_position], &buf[erased_key_position]);
/// Now we have another freed place
erased_key_position = next_position;
}
buf[erased_key_position].setZero();
--m_size;
return true;
}
bool ALWAYS_INLINE has(const Key & x) const

View File

@ -0,0 +1,244 @@
#pragma once
#include <common/types.h>
#include <boost/intrusive/trivial_value_traits.hpp>
#include <boost/intrusive/list.hpp>
#include <boost/noncopyable.hpp>
#include <Core/Defines.h>
#include <Common/Exception.h>
#include <Common/HashTable/HashMap.h>
#include <Common/PODArray.h>
template <typename TKey, typename TMapped, typename Hash, bool save_hash_in_cell>
struct LRUHashMapCell :
public std::conditional_t<save_hash_in_cell,
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>
{
public:
using Key = TKey;
using Base = std::conditional_t<save_hash_in_cell,
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>;
using Mapped = typename Base::Mapped;
using State = typename Base::State;
using mapped_type = Mapped;
using key_type = Key;
using Base::Base;
static bool constexpr need_to_notify_cell_during_move = true;
static void move(LRUHashMapCell * __restrict old_location, LRUHashMapCell * __restrict new_location)
{
/** We update new location prev and next pointers because during hash table resize
* they can be updated during move of another cell.
*/
new_location->prev = old_location->prev;
new_location->next = old_location->next;
LRUHashMapCell * prev = new_location->prev;
LRUHashMapCell * next = new_location->next;
/// Updated previous next and next previous nodes of list to point to new location
if (prev)
prev->next = new_location;
if (next)
next->prev = new_location;
}
private:
template<typename, typename, typename, bool>
friend class LRUHashMapCellNodeTraits;
LRUHashMapCell * next = nullptr;
LRUHashMapCell * prev = nullptr;
};
template<typename Key, typename Value, typename Hash, bool save_hash_in_cell>
struct LRUHashMapCellNodeTraits
{
using node = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell>;
using node_ptr = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
using const_node_ptr = const LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
static node * get_next(const node * ptr) { return ptr->next; }
static void set_next(node * __restrict ptr, node * __restrict next) { ptr->next = next; }
static node * get_previous(const node * ptr) { return ptr->prev; }
static void set_previous(node * __restrict ptr, node * __restrict prev) { ptr->prev = prev; }
};
template <typename TKey, typename TValue, typename Hash, bool save_hash_in_cells>
class LRUHashMapImpl :
private HashMapTable<
TKey,
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
Hash,
HashTableGrower<>,
HashTableAllocator>
{
using Base = HashMapTable<
TKey,
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
Hash,
HashTableGrower<>,
HashTableAllocator>;
public:
using Key = TKey;
using Value = TValue;
using Cell = LRUHashMapCell<Key, Value, Hash, save_hash_in_cells>;
using LRUHashMapCellIntrusiveValueTraits =
boost::intrusive::trivial_value_traits<
LRUHashMapCellNodeTraits<Key, Value, Hash, save_hash_in_cells>,
boost::intrusive::link_mode_type::normal_link>;
using LRUList = boost::intrusive::list<
Cell,
boost::intrusive::value_traits<LRUHashMapCellIntrusiveValueTraits>,
boost::intrusive::constant_time_size<false>>;
using iterator = typename LRUList::iterator;
using const_iterator = typename LRUList::const_iterator;
using reverse_iterator = typename LRUList::reverse_iterator;
using const_reverse_iterator = typename LRUList::const_reverse_iterator;
LRUHashMapImpl(size_t max_size_, bool preallocate_max_size_in_hash_map = false)
: Base(preallocate_max_size_in_hash_map ? max_size_ : 32)
, max_size(max_size_)
{
assert(max_size > 0);
}
std::pair<Cell *, bool> insert(const Key & key, const Value & value)
{
return emplace(key, value);
}
std::pair<Cell *, bool> insert(const Key & key, Value && value)
{
return emplace(key, std::move(value));
}
template<typename ...Args>
std::pair<Cell *, bool> emplace(const Key & key, Args&&... args)
{
size_t hash_value = Base::hash(key);
Cell * it = Base::find(key, hash_value);
if (it)
{
/// Cell contains element return it and put to the end of lru list
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
return std::make_pair(it, false);
}
if (size() == max_size)
{
/// Erase least recently used element from front of the list
Cell & node = lru_list.front();
const Key & element_to_remove_key = node.getKey();
size_t key_hash = node.getHash(*this);
lru_list.pop_front();
[[maybe_unused]] bool erased = Base::erase(element_to_remove_key, key_hash);
assert(erased);
}
[[maybe_unused]] bool inserted;
/// Insert value first try to insert in zero storage if not then insert in buffer
if (!Base::emplaceIfZero(key, it, inserted, hash_value))
Base::emplaceNonZero(key, it, inserted, hash_value);
assert(inserted);
new (&it->getMapped()) Value(std::forward<Args>(args)...);
/// Put cell to the end of lru list
lru_list.insert(lru_list.end(), *it);
return std::make_pair(it, true);
}
using Base::find;
Value & get(const Key & key)
{
auto it = Base::find(key);
assert(it);
Value & value = it->getMapped();
/// Put cell to the end of lru list
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
return value;
}
const Value & get(const Key & key) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
}
bool contains(const Key & key) const
{
return Base::has(key);
}
bool erase(const Key & key)
{
auto hash = Base::hash(key);
auto it = Base::find(key, hash);
if (!it)
return false;
lru_list.erase(lru_list.iterator_to(*it));
return Base::erase(key, hash);
}
void clear()
{
lru_list.clear();
Base::clear();
}
using Base::size;
size_t getMaxSize() const { return max_size; }
iterator begin() { return lru_list.begin(); }
const_iterator begin() const { return lru_list.cbegin(); }
iterator end() { return lru_list.end(); }
const_iterator end() const { return lru_list.cend(); }
reverse_iterator rbegin() { return lru_list.rbegin(); }
const_reverse_iterator rbegin() const { return lru_list.crbegin(); }
reverse_iterator rend() { return lru_list.rend(); }
const_reverse_iterator rend() const { return lru_list.crend(); }
private:
size_t max_size;
LRUList lru_list;
};
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
using LRUHashMap = LRUHashMapImpl<Key, Value, Hash, false>;
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Hash, true>;

View File

@ -99,6 +99,11 @@ ThreadStatus::~ThreadStatus()
/// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
}
#if !defined(ARCADIA_BUILD)
/// It may cause segfault if query_context was destroyed, but was not detached
assert((!query_context && query_id.empty()) || (query_context && query_id == query_context->getCurrentQueryId()));
#endif
if (deleter)
deleter();
current_thread = nullptr;

View File

@ -201,7 +201,7 @@ public:
void setFatalErrorCallback(std::function<void()> callback);
void onFatalError();
/// Sets query context for current thread and its thread group
/// Sets query context for current master thread and its thread group
/// NOTE: query_context have to be alive until detachQuery() is called
void attachQueryContext(Context & query_context);

View File

@ -38,6 +38,9 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
add_executable (pod_array pod_array.cpp)
target_link_libraries (pod_array PRIVATE clickhouse_common_io)
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)

View File

@ -0,0 +1,161 @@
#include <iomanip>
#include <iostream>
#include <Common/HashTable/LRUHashMap.h>
#include <gtest/gtest.h>
template<typename LRUHashMap>
std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
{
std::vector<typename LRUHashMap::Key> result;
result.reserve(map.size());
for (auto & node: map)
result.emplace_back(node.getKey());
return result;
}
void testInsert(size_t elements_to_insert_size, size_t map_size)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(map_size);
std::vector<int> expected;
for (size_t i = 0; i < elements_to_insert_size; ++i)
map.insert(i, i);
for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
expected.emplace_back(i);
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(map.size(), actual.size());
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, Insert)
{
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.insert(2, 2);
int v = 3;
map.insert(3, v);
map.emplace(4, 4);
std::vector<int> expected = { 2, 3, 4 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
testInsert(1200000, 1200000);
testInsert(10, 5);
testInsert(1200000, 2);
testInsert(1200000, 1);
}
TEST(LRUHashMap, GetModify)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.get(3) = 4;
std::vector<int> expected = { 1, 2, 4 };
std::vector<int> actual;
actual.reserve(map.size());
for (auto & node : map)
actual.emplace_back(node.getMapped());
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, SetRecentKeyToTop)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.emplace(1, 4);
std::vector<int> expected = { 2, 3, 1 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, GetRecentKeyToTop)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.get(1);
std::vector<int> expected = { 2, 3, 1 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, Contains)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
ASSERT_TRUE(map.contains(1));
ASSERT_TRUE(map.contains(2));
ASSERT_TRUE(map.contains(3));
ASSERT_EQ(map.size(), 3);
map.erase(1);
map.erase(2);
map.erase(3);
ASSERT_EQ(map.size(), 0);
ASSERT_FALSE(map.contains(1));
ASSERT_FALSE(map.contains(2));
ASSERT_FALSE(map.contains(3));
}
TEST(LRUHashMap, Clear)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.clear();
std::vector<int> expected = {};
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
ASSERT_EQ(map.size(), 0);
}

View File

@ -0,0 +1,244 @@
#include <vector>
#include <list>
#include <map>
#include <random>
#include <pcg_random.hpp>
#include <Common/Stopwatch.h>
#include <Common/HashTable/LRUHashMap.h>
template<class Key, class Value>
class LRUHashMapBasic
{
public:
using key_type = Key;
using value_type = Value;
using list_type = std::list<key_type>;
using node = std::pair<value_type, typename list_type::iterator>;
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
LRUHashMapBasic(size_t max_size_, bool preallocated)
: hash_map(preallocated ? max_size_ : 32)
, max_size(max_size_)
{
}
void insert(const Key &key, const Value &value)
{
auto it = hash_map.find(key);
if (it == hash_map.end())
{
if (size() >= max_size)
{
auto iterator_to_remove = list.begin();
hash_map.erase(*iterator_to_remove);
list.erase(iterator_to_remove);
}
list.push_back(key);
hash_map[key] = std::make_pair(value, --list.end());
}
else
{
auto & [value_to_update, iterator_in_list_to_update] = it->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = list.end();
value_to_update = value;
}
}
value_type & get(const key_type &key)
{
auto iterator_in_map = hash_map.find(key);
assert(iterator_in_map != hash_map.end());
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = list.end();
return value_to_return;
}
const value_type & get(const key_type & key) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
}
size_t getMaxSize() const
{
return max_size;
}
size_t size() const
{
return hash_map.size();
}
bool empty() const
{
return hash_map.empty();
}
bool contains(const Key & key)
{
return hash_map.find(key) != hash_map.end();
}
void clear()
{
hash_map.clear();
list.clear();
}
private:
map_type hash_map;
list_type list;
size_t max_size;
};
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
{
std::vector<UInt64> numbers;
numbers.reserve(numbers_to_insert_size);
std::random_device rd;
pcg64 gen(rd());
UInt64 min = std::numeric_limits<UInt64>::min();
UInt64 max = std::numeric_limits<UInt64>::max();
auto distribution = std::uniform_int_distribution<>(min, max);
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
UInt64 number = distribution(gen);
numbers.emplace_back(number);
}
return numbers;
}
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
HashMap<int, int> hash_map(preallocated ? map_size : 32);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto * it = hash_map.find(numbers_to_insert[i]);
if (it)
summ += it->getMapped();
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
}
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto it = hash_map.find(numbers_to_insert[i]);
if (it != hash_map.end())
summ += it->second;
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
}
template<typename LRUCache>
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
LRUCache cache(map_size, preallocated);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
}
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
if (cache.contains(numbers_to_insert[i]))
summ += cache.get(numbers_to_insert[i]);
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
return summ;
}
int main(int argc, char ** argv)
{
(void)(argc);
(void)(argv);
size_t hash_map_size = 1200000;
size_t numbers_to_insert_size = 12000000;
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
return 0;
}

View File

@ -86,8 +86,6 @@ class IColumn;
\
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
\
M(Milliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
\
M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
\
@ -420,6 +418,7 @@ class IColumn;
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \

View File

@ -734,6 +734,23 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
return actions_dag;
}
ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
{
auto adding_column_action = std::make_shared<ActionsDAG>();
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
auto column_name = column.name;
const auto & column_node = adding_column_action->addColumn(std::move(column));
Inputs inputs = {const_cast<Node *>(&column_node)};
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
adding_column_action->addAlias(function_node, std::move(column_name), true);
return adding_column_action;
}
ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
{
/// first: x (1), x (2), y ==> x (2), z, x (3)

View File

@ -250,6 +250,9 @@ public:
MatchColumnsMode mode,
bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
/// Create expression which add const column and then materialize it.
static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
/// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
/// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
/// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.

View File

@ -1139,12 +1139,6 @@ String Context::getCurrentDatabase() const
}
String Context::getCurrentQueryId() const
{
return client_info.current_query_id;
}
String Context::getInitialQueryId() const
{
return client_info.initial_query_id;

View File

@ -441,7 +441,7 @@ public:
StoragePtr getViewSource();
String getCurrentDatabase() const;
String getCurrentQueryId() const;
String getCurrentQueryId() const { return client_info.current_query_id; }
/// Id of initiating query for distributed queries; or current query id if it's not a distributed query.
String getInitialQueryId() const;

View File

@ -610,12 +610,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
ReadBufferFromString istr(query_to_execute);
String dummy_string;
WriteBufferFromString ostr(dummy_string);
std::optional<CurrentThread::QueryScope> query_scope;
try
{
auto current_context = std::make_unique<Context>(context);
current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
current_context->setCurrentQueryId(""); // generate random query_id
query_scope.emplace(*current_context);
executeQuery(istr, ostr, false, *current_context, {});
}
catch (...)
@ -632,20 +634,6 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
return true;
}
void DDLWorker::attachToThreadGroup()
{
if (thread_group)
{
/// Put all threads to one thread pool
CurrentThread::attachToIfDetached(thread_group);
}
else
{
CurrentThread::initializeQuery();
thread_group = CurrentThread::getGroup();
}
}
void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
{
@ -1148,8 +1136,6 @@ void DDLWorker::runMainThread()
{
try
{
attachToThreadGroup();
cleanup_event->set();
scheduleTasks();

View File

@ -162,8 +162,6 @@ private:
void runMainThread();
void runCleanupThread();
void attachToThreadGroup();
private:
Context context;
Poco::Logger * log;
@ -196,8 +194,6 @@ private:
/// How many tasks could be in the queue
size_t max_tasks_in_queue = 1000;
ThreadGroupStatusPtr thread_group;
std::atomic<UInt64> max_id = 0;
friend class DDLQueryStatusInputStream;

View File

@ -37,7 +37,7 @@ void DNSCacheUpdater::run()
* - automatically throttle when DNS requests take longer time;
* - add natural randomization on huge clusters - avoid sending all requests at the same moment of time from different servers.
*/
task_handle->scheduleAfter(update_period_seconds * 1000);
task_handle->scheduleAfter(size_t(update_period_seconds) * 1000);
}
void DNSCacheUpdater::start()

View File

@ -1,9 +1,8 @@
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeEnum.h>
@ -31,33 +30,38 @@ Block PartLogElement::createBlock()
}
);
return
{
{ColumnString::create(), std::make_shared<DataTypeString>(), "query_id"},
{ColumnInt8::create(), std::move(event_type_datatype), "event_type"},
{ColumnUInt16::create(), std::make_shared<DataTypeDate>(), "event_date"},
{ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "duration_ms"},
ColumnsWithTypeAndName columns_with_type_and_name;
{ColumnString::create(), std::make_shared<DataTypeString>(), "database"},
{ColumnString::create(), std::make_shared<DataTypeString>(), "table"},
{ColumnString::create(), std::make_shared<DataTypeString>(), "part_name"},
{ColumnString::create(), std::make_shared<DataTypeString>(), "partition_id"},
{ColumnString::create(), std::make_shared<DataTypeString>(), "path_on_disk"},
return {
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "rows"},
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "size_in_bytes"}, // On disk
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "query_id"),
columns_with_type_and_name.emplace_back(std::move(event_type_datatype), "event_type"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "duration_ms"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "database"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "table"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "part_name"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "partition_id"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "path_on_disk"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "rows"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "size_in_bytes"), // On disk
/// Merge-specific info
{ColumnArray::create(ColumnString::create()), std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"},
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "bytes_uncompressed"}, // Result bytes
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "read_rows"},
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "read_bytes"},
{ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "peak_memory_usage"},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "bytes_uncompressed"), // Result bytes
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_rows"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_bytes"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "peak_memory_usage"),
/// Is there an error during the execution or commit
{ColumnUInt16::create(), std::make_shared<DataTypeUInt16>(), "error"},
{ColumnString::create(), std::make_shared<DataTypeString>(), "exception"},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt16>(), "error"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "exception"),
};
}
@ -69,6 +73,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(event_type);
columns[i++]->insert(DateLUT::instance().toDayNum(event_time));
columns[i++]->insert(event_time);
columns[i++]->insert(event_time_microseconds);
columns[i++]->insert(duration_ms);
columns[i++]->insert(database_name);
@ -97,13 +102,25 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
}
bool PartLog::addNewPart(Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
bool PartLog::addNewPart(
Context & current_context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
{
return addNewParts(current_context, {part}, elapsed_ns, execution_status);
}
bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns,
const ExecutionStatus & execution_status)
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
}
bool PartLog::addNewParts(
Context & current_context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns, const ExecutionStatus & execution_status)
{
if (parts.empty())
return true;
@ -127,7 +144,12 @@ bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataP
elem.query_id.insert(0, query_id.data, query_id.size);
elem.event_type = PartLogElement::NEW_PART;
elem.event_time = time(nullptr);
// construct event_time and event_time_microseconds using the same time point
// so that the two times will always be equal up to a precision of a second.
const auto time_now = std::chrono::system_clock::now();
elem.event_time = time_in_seconds(time_now);
elem.event_time_microseconds = time_in_microseconds(time_now);
elem.duration_ms = elapsed_ns / 1000000;
elem.database_name = table_id.database_name;

View File

@ -23,6 +23,7 @@ struct PartLogElement
Type event_type = NEW_PART;
time_t event_time = 0;
Decimal64 event_time_microseconds = 0;
UInt64 duration_ms = 0;
String database_name;

View File

@ -500,6 +500,8 @@ CurrentThread::QueryScope::QueryScope(Context & query_context)
{
CurrentThread::initializeQuery();
CurrentThread::attachQueryContext(query_context);
if (!query_context.hasQueryContext())
query_context.makeQueryContext();
}
void CurrentThread::QueryScope::logPeakMemoryUsage()

View File

@ -343,13 +343,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
const auto current_time = std::chrono::system_clock::now();
/// If we already executing query and it requires to execute internal query, than
/// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread.
if (!internal)
{
context.makeQueryContext();
CurrentThread::attachQueryContext(context);
}
#if !defined(ARCADIA_BUILD)
assert(internal || CurrentThread::get().getQueryContext());
assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
#endif
const Settings & settings = context.getSettingsRef();
@ -524,6 +521,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
quota = context.getQuota();
if (quota)
{
if (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>())
{
quota->used(Quota::QUERY_SELECTS, 1);
}
else if (ast->as<ASTInsertQuery>())
{
quota->used(Quota::QUERY_INSERTS, 1);
}
quota->used(Quota::QUERIES, 1);
quota->checkExceeded(Quota::ERRORS);
}

View File

@ -12,7 +12,7 @@ DelayedPortsProcessor::DelayedPortsProcessor(
const Block & header, size_t num_ports, const PortNumbers & delayed_ports, bool assert_main_ports_empty)
: IProcessor(InputPorts(num_ports, header),
OutputPorts((assert_main_ports_empty ? delayed_ports.size() : num_ports), header))
, num_delayed(delayed_ports.size())
, num_delayed_ports(delayed_ports.size())
{
port_pairs.resize(num_ports);
output_to_pair.reserve(outputs.size());
@ -36,21 +36,24 @@ DelayedPortsProcessor::DelayedPortsProcessor(
}
}
void DelayedPortsProcessor::finishPair(PortsPair & pair)
{
if (!pair.is_finished)
{
pair.is_finished = true;
++num_finished_pairs;
if (pair.output_port)
++num_finished_outputs;
}
}
bool DelayedPortsProcessor::processPair(PortsPair & pair)
{
auto finish = [&]()
{
if (!pair.is_finished)
{
pair.is_finished = true;
++num_finished;
}
};
if (pair.output_port && pair.output_port->isFinished())
{
pair.input_port->close();
finish();
finishPair(pair);
return false;
}
@ -58,7 +61,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
{
if (pair.output_port)
pair.output_port->finish();
finish();
finishPair(pair);
return false;
}
@ -72,7 +75,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Input port for DelayedPortsProcessor is assumed to have no data, but it has one");
pair.output_port->pushData(pair.input_port->pullData());
pair.output_port->pushData(pair.input_port->pullData(true));
}
return true;
@ -80,7 +83,7 @@ bool DelayedPortsProcessor::processPair(PortsPair & pair)
IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
{
bool skip_delayed = (num_finished + num_delayed) < port_pairs.size();
bool skip_delayed = (num_finished_pairs + num_delayed_ports) < port_pairs.size();
bool need_data = false;
if (!are_inputs_initialized && !updated_outputs.empty())
@ -95,9 +98,27 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
for (const auto & output_number : updated_outputs)
{
auto pair_num = output_to_pair[output_number];
if (!skip_delayed || !port_pairs[pair_num].is_delayed)
need_data = processPair(port_pairs[pair_num]) || need_data;
auto & pair = port_pairs[output_to_pair[output_number]];
/// Finish pair of ports earlier if possible.
if (!pair.is_finished && pair.output_port && pair.output_port->isFinished())
finishPair(pair);
else if (!skip_delayed || !pair.is_delayed)
need_data = processPair(pair) || need_data;
}
/// Do not wait for delayed ports if all output ports are finished.
if (num_finished_outputs == outputs.size())
{
for (auto & pair : port_pairs)
{
if (pair.output_port)
pair.output_port->finish();
pair.input_port->close();
}
return Status::Finished;
}
for (const auto & input_number : updated_inputs)
@ -107,14 +128,14 @@ IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_in
}
/// In case if main streams are finished at current iteration, start processing delayed streams.
if (skip_delayed && (num_finished + num_delayed) >= port_pairs.size())
if (skip_delayed && (num_finished_pairs + num_delayed_ports) >= port_pairs.size())
{
for (auto & pair : port_pairs)
if (pair.is_delayed)
need_data = processPair(pair) || need_data;
}
if (num_finished == port_pairs.size())
if (num_finished_pairs == port_pairs.size())
return Status::Finished;
if (need_data)

View File

@ -28,13 +28,15 @@ private:
};
std::vector<PortsPair> port_pairs;
size_t num_delayed;
size_t num_finished = 0;
const size_t num_delayed_ports;
size_t num_finished_pairs = 0;
size_t num_finished_outputs = 0;
std::vector<size_t> output_to_pair;
bool are_inputs_initialized = false;
bool processPair(PortsPair & pair);
void finishPair(PortsPair & pair);
};
}

View File

@ -1,41 +0,0 @@
#include <Processors/QueryPlan/AddingConstColumnStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/AddingConstColumnTransform.h>
#include <IO/Operators.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
AddingConstColumnStep::AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_)
: ITransformingStep(input_stream_,
AddingConstColumnTransform::transformHeader(input_stream_.header, column_),
getTraits())
, column(std::move(column_))
{
}
void AddingConstColumnStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingConstColumnTransform>(header, column);
});
}
}

View File

@ -1,22 +0,0 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
namespace DB
{
/// Adds a materialized const column with a specified value.
class AddingConstColumnStep : public ITransformingStep
{
public:
AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_);
String getName() const override { return "AddingConstColumn"; }
void transformPipeline(QueryPipeline & pipeline) override;
private:
ColumnWithTypeAndName column;
};
}

View File

@ -1,43 +0,0 @@
#pragma once
#include <Processors/ISimpleTransform.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/// Adds a materialized const column to the chunk with a specified value.
class AddingConstColumnTransform : public ISimpleTransform
{
public:
AddingConstColumnTransform(const Block & header, ColumnWithTypeAndName column_)
: ISimpleTransform(header, transformHeader(header, column_), false)
, column(std::move(column_))
{
if (!column.column || !isColumnConst(*column.column) || !column.column->empty())
throw Exception("AddingConstColumnTransform expected empty const column", ErrorCodes::LOGICAL_ERROR);
}
String getName() const override { return "AddingConstColumnTransform"; }
static Block transformHeader(Block header, ColumnWithTypeAndName & column_)
{
header.insert(column_);
return header;
}
protected:
void transform(Chunk & chunk) override
{
auto num_rows = chunk.getNumRows();
chunk.addColumn(column.column->cloneResized(num_rows)->convertToFullColumnIfConst());
}
private:
ColumnWithTypeAndName column;
};
}

View File

@ -92,7 +92,6 @@ SRCS(
Pipe.cpp
Port.cpp
QueryPipeline.cpp
QueryPlan/AddingConstColumnStep.cpp
QueryPlan/AddingDelayedSourceStep.cpp
QueryPlan/AddingMissedStep.cpp
QueryPlan/AggregatingStep.cpp

View File

@ -652,7 +652,6 @@ namespace
/// Create context.
query_context.emplace(iserver.context());
query_scope.emplace(*query_context);
/// Authentication.
query_context->setUser(user, password, user_address);
@ -670,6 +669,8 @@ namespace
query_context->setSessionContext(session->context);
}
query_scope.emplace(*query_context);
/// Set client info.
ClientInfo & client_info = query_context->getClientInfo();
client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY;

View File

@ -24,6 +24,7 @@
#include <regex>
#include <Access/User.h>
#include <Access/AccessControlManager.h>
#include <Common/setThreadName.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
@ -86,6 +87,8 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so
void MySQLHandler::run()
{
setThreadName("MySQLHandler");
ThreadStatus thread_status;
connection_context.makeSessionContext();
connection_context.getClientInfo().interface = ClientInfo::Interface::MYSQL;
connection_context.setDefaultFormat("MySQLWire");
@ -340,7 +343,9 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
affected_rows += progress.written_rows;
});
executeQuery(should_replace ? replacement : payload, *out, true, query_context,
CurrentThread::QueryScope query_scope{query_context};
executeQuery(should_replace ? replacement : payload, *out, false, query_context,
[&with_output](const String &, const String &, const String &, const String &)
{
with_output = true;

View File

@ -5,6 +5,7 @@
#include <Interpreters/executeQuery.h>
#include "PostgreSQLHandler.h"
#include <Parsers/parseQuery.h>
#include <Common/setThreadName.h>
#include <random>
#if !defined(ARCADIA_BUILD)
@ -49,6 +50,8 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket)
void PostgreSQLHandler::run()
{
setThreadName("PostgresHandler");
ThreadStatus thread_status;
connection_context.makeSessionContext();
connection_context.getClientInfo().interface = ClientInfo::Interface::POSTGRESQL;
connection_context.setDefaultFormat("PostgreSQLWire");
@ -273,8 +276,10 @@ void PostgreSQLHandler::processQuery()
for (const auto & spl_query : queries)
{
/// FIXME why do we execute all queries in a single connection context?
CurrentThread::QueryScope query_scope{connection_context};
ReadBufferFromString read_buf(spl_query);
executeQuery(read_buf, *out, true, connection_context, {});
executeQuery(read_buf, *out, false, connection_context, {});
PostgreSQLProtocol::Messaging::CommandComplete::Command command =
PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(spl_query);

View File

@ -75,7 +75,9 @@ static Block adoptBlock(const Block & header, const Block & block, Poco::Logger
ConvertingBlockInputStream::MatchColumnsMode::Name);
return convert.read();
}
static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, const size_t repeats, Poco::Logger * log)
static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & block, size_t repeats, Poco::Logger * log)
{
Block adopted_block = adoptBlock(out->getHeader(), block, log);
for (size_t i = 0; i < repeats; ++i)
@ -387,11 +389,18 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key;
size_t start = 0;
size_t end = shards_info.size();
if (random_shard_insert)
if (settings.insert_shard_id)
{
start = settings.insert_shard_id - 1;
end = settings.insert_shard_id;
}
else if (random_shard_insert)
{
start = storage.getRandomShardIndex(shards_info);
end = start + 1;
}
size_t num_shards = end - start;
if (!pool)
@ -549,7 +558,7 @@ void DistributedBlockOutputStream::writeSplitAsync(const Block & block)
}
void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const size_t shard_id)
void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, size_t shard_id)
{
const auto & shard_info = cluster->getShardsInfo()[shard_id];
const auto & settings = context.getSettingsRef();
@ -585,7 +594,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
}
void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
void DistributedBlockOutputStream::writeToLocal(const Block & block, size_t repeats)
{
/// Async insert does not support settings forwarding yet whereas sync one supports
InterpreterInsertQuery interp(query_ast, context);

View File

@ -62,10 +62,10 @@ private:
void writeSplitAsync(const Block & block);
void writeAsyncImpl(const Block & block, const size_t shard_id = 0);
void writeAsyncImpl(const Block & block, size_t shard_id = 0);
/// Increments finished_writings_count after each repeat.
void writeToLocal(const Block & block, const size_t repeats);
void writeToLocal(const Block & block, size_t repeats);
void writeToShard(const Block & block, const std::vector<std::string> & dir_names);

View File

@ -3675,6 +3675,17 @@ bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const
return true;
}
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
}
void MergeTreeData::writePartLog(
PartLogElement::Type type,
const ExecutionStatus & execution_status,
@ -3697,7 +3708,12 @@ try
part_log_elem.error = static_cast<UInt16>(execution_status.code);
part_log_elem.exception = execution_status.message;
part_log_elem.event_time = time(nullptr);
// construct event_time and event_time_microseconds using the same time point
// so that the two times will always be equal up to a precision of a second.
const auto time_now = std::chrono::system_clock::now();
part_log_elem.event_time = time_in_seconds(time_now);
part_log_elem.event_time_microseconds = time_in_microseconds(time_now);
/// TODO: Stop stopwatch in outer code to exclude ZK timings and so on
part_log_elem.duration_ms = elapsed_ns / 1000000;
@ -3754,18 +3770,6 @@ MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger()
}
}
bool MergeTreeData::selectPartsAndMove()
{
if (parts_mover.moves_blocker.isCancelled())
return false;
auto moving_tagger = selectPartsForMove();
if (moving_tagger->parts_to_move.empty())
return false;
return moveParts(std::move(moving_tagger));
}
std::optional<JobAndPool> MergeTreeData::getDataMovingJob()
{
if (parts_mover.moves_blocker.isCancelled())

View File

@ -465,9 +465,6 @@ public:
DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout,
bool skip_intersecting_parts, DataPartsLock & lock);
/// Renames the part to detached/<prefix>_<part> and removes it from working set.
void removePartsFromWorkingSetAndCloneToDetached(const DataPartsVector & parts, bool clear_without_timeout, const String & prefix = "");
/// Renames the part to detached/<prefix>_<part> and removes it from data_parts,
//// so it will not be deleted in clearOldParts.
/// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part.
@ -916,9 +913,6 @@ protected:
/// Moves part to specified space, used in ALTER ... MOVE ... queries
bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
/// Selects parts for move and moves them, used in background process
bool selectPartsAndMove();
private:
/// RAII Wrapper for atomic work with currently moving parts

View File

@ -28,12 +28,10 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/AddingConstColumnStep.h>
#include <Processors/QueryPlan/ReverseRowsStep.h>
#include <Processors/QueryPlan/MergingSortedStep.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/MergingFinal.h>
#include <Processors/QueryPlan/ReadNothingStep.h>
#include <Core/UUID.h>
#include <DataTypes/DataTypeDate.h>
@ -841,7 +839,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
column.type = std::make_shared<DataTypeFloat64>();
column.column = column.type->createColumnConst(0, Field(used_sample_factor));
auto adding_column = std::make_unique<AddingConstColumnStep>(plan->getCurrentDataStream(), std::move(column));
auto adding_column_action = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column = std::make_unique<ExpressionStep>(plan->getCurrentDataStream(), std::move(adding_column_action));
adding_column->setStepDescription("Add _sample_factor column");
plan->addStep(std::move(adding_column));
}

View File

@ -83,6 +83,7 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
extern const int TOO_MANY_ROWS;
extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
extern const int INVALID_SHARD_ID;
}
namespace ActionLocks
@ -542,22 +543,29 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta
const auto & settings = context.getSettingsRef();
/// Ban an attempt to make async insert into the table belonging to DatabaseMemory
if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync)
if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id)
{
throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts",
ErrorCodes::BAD_ARGUMENTS);
}
auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount();
/// If sharding key is not specified, then you can only write to a shard containing only one shard
if (!settings.insert_distributed_one_random_shard && !has_sharding_key
&& ((cluster->getLocalShardCount() + cluster->getRemoteShardCount()) >= 2))
if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2)
{
throw Exception("Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
ErrorCodes::STORAGE_REQUIRES_PARAMETER);
throw Exception(
"Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
ErrorCodes::STORAGE_REQUIRES_PARAMETER);
}
if (settings.insert_shard_id && settings.insert_shard_id > shard_num)
{
throw Exception("Shard id should be range from 1 to shard number", ErrorCodes::INVALID_SHARD_ID);
}
/// Force sync insertion if it is remote() table function
bool insert_sync = settings.insert_distributed_sync || owned_cluster;
bool insert_sync = settings.insert_distributed_sync || settings.insert_shard_id || owned_cluster;
auto timeout = settings.insert_distributed_timeout;
/// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster

View File

@ -24,7 +24,6 @@
#include <Parsers/queryToString.h>
#include <Processors/Transforms/MaterializingTransform.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/Transforms/AddingConstColumnTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
@ -364,9 +363,13 @@ Pipe StorageMerge::createSources(
column.name = "_table";
column.type = std::make_shared<DataTypeString>();
column.column = column.type->createColumnConst(0, Field(table_name));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(std::move(adding_column_dag));
pipe.addSimpleTransform([&](const Block & stream_header)
{
return std::make_shared<AddingConstColumnTransform>(stream_header, column);
return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions);
});
}

View File

@ -3699,7 +3699,7 @@ void StorageReplicatedMergeTree::shutdown()
/// We clear all old parts after stopping all background operations. It's
/// important, because background operations can produce temporary parts
/// which will remove themselves in their descrutors. If so, we may have
/// which will remove themselves in their destructors. If so, we may have
/// race condition between our remove call and background process.
clearOldPartsFromFilesystem(true);
}

View File

@ -12,6 +12,9 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/typeid_cast.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/Set.h>
#include <Interpreters/interpretSubquery.h>
namespace DB
@ -43,8 +46,24 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes()
};
}
using Paths = Strings;
static bool extractPathImpl(const IAST & elem, String & res, const Context & context)
static String pathCorrected(const String & path)
{
String path_corrected;
/// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
/// ZooKeeper::sendThread and the session will fail.
if (path[0] != '/')
path_corrected = '/';
path_corrected += path;
/// In all cases except the root, path must not end with a slash.
if (path_corrected != "/" && path_corrected.back() == '/')
path_corrected.resize(path_corrected.size() - 1);
return path_corrected;
}
static bool extractPathImpl(const IAST & elem, Paths & res, const Context & context)
{
const auto * function = elem.as<ASTFunction>();
if (!function)
@ -59,15 +78,65 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
return false;
}
if (function->name == "equals")
{
const auto & args = function->arguments->as<ASTExpressionList &>();
ASTPtr value;
const auto & args = function->arguments->as<ASTExpressionList &>();
if (args.children.size() != 2)
return false;
if (args.children.size() != 2)
if (function->name == "in")
{
const ASTIdentifier * ident = args.children.at(0)->as<ASTIdentifier>();
if (!ident || ident->name() != "path")
return false;
ASTPtr value = args.children.at(1);
if (value->as<ASTSubquery>())
{
auto interpreter_subquery = interpretSubquery(value, context, {}, {});
auto stream = interpreter_subquery->execute().getInputStream();
SizeLimits limites(context.getSettingsRef().max_rows_in_set, context.getSettingsRef().max_bytes_in_set, OverflowMode::THROW);
Set set(limites, true, context.getSettingsRef().transform_null_in);
set.setHeader(stream->getHeader());
stream->readPrefix();
while (Block block = stream->read())
{
set.insertFromBlock(block);
}
set.finishInsert();
stream->readSuffix();
set.checkColumnsNumber(1);
const auto & set_column = *set.getSetElements()[0];
for (size_t row = 0; row < set_column.size(); ++row)
res.emplace_back(set_column[row].safeGet<String>());
}
else
{
auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal)
return false;
if (String str; literal->value.tryGet(str))
{
res.emplace_back(str);
}
else if (Tuple tuple; literal->value.tryGet(tuple))
{
for (auto element : tuple)
res.emplace_back(element.safeGet<String>());
}
else
return false;
}
return true;
}
else if (function->name == "equals")
{
const ASTIdentifier * ident;
ASTPtr value;
if ((ident = args.children.at(0)->as<ASTIdentifier>()))
value = args.children.at(1);
else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
@ -86,7 +155,7 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
if (literal->value.getType() != Field::Types::String)
return false;
res = literal->value.safeGet<String>();
res.emplace_back(literal->value.safeGet<String>());
return true;
}
@ -96,69 +165,69 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con
/** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause.
*/
static String extractPath(const ASTPtr & query, const Context & context)
static Paths extractPath(const ASTPtr & query, const Context & context)
{
const auto & select = query->as<ASTSelectQuery &>();
if (!select.where())
return "";
return Paths();
String res;
return extractPathImpl(*select.where(), res, context) ? res : "";
Paths res;
return extractPathImpl(*select.where(), res, context) ? res : Paths();
}
void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const
{
String path = extractPath(query_info.query, context);
if (path.empty())
throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
const Paths & paths = extractPath(query_info.query, context);
if (paths.empty())
throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
zkutil::ZooKeeperPtr zookeeper = context.getZooKeeper();
String path_corrected;
/// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in
/// ZooKeeper::sendThread and the session will fail.
if (path[0] != '/')
path_corrected = '/';
path_corrected += path;
/// In all cases except the root, path must not end with a slash.
if (path_corrected != "/" && path_corrected.back() == '/')
path_corrected.resize(path_corrected.size() - 1);
zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
String path_part = path_corrected;
if (path_part == "/")
path_part.clear();
std::vector<std::future<Coordination::GetResponse>> futures;
futures.reserve(nodes.size());
for (const String & node : nodes)
futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
for (size_t i = 0, size = nodes.size(); i < size; ++i)
std::unordered_set<String> paths_corrected;
for (const auto & path : paths)
{
auto res = futures[i].get();
if (res.error == Coordination::Error::ZNONODE)
continue; /// Node was deleted meanwhile.
const String & path_corrected = pathCorrected(path);
auto [it, inserted] = paths_corrected.emplace(path_corrected);
if (!inserted) /// Do not repeat processing.
continue;
const Coordination::Stat & stat = res.stat;
zkutil::Strings nodes = zookeeper->getChildren(path_corrected);
size_t col_num = 0;
res_columns[col_num++]->insert(nodes[i]);
res_columns[col_num++]->insert(res.data);
res_columns[col_num++]->insert(stat.czxid);
res_columns[col_num++]->insert(stat.mzxid);
res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
res_columns[col_num++]->insert(stat.version);
res_columns[col_num++]->insert(stat.cversion);
res_columns[col_num++]->insert(stat.aversion);
res_columns[col_num++]->insert(stat.ephemeralOwner);
res_columns[col_num++]->insert(stat.dataLength);
res_columns[col_num++]->insert(stat.numChildren);
res_columns[col_num++]->insert(stat.pzxid);
res_columns[col_num++]->insert(path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
String path_part = path_corrected;
if (path_part == "/")
path_part.clear();
std::vector<std::future<Coordination::GetResponse>> futures;
futures.reserve(nodes.size());
for (const String & node : nodes)
futures.push_back(zookeeper->asyncTryGet(path_part + '/' + node));
for (size_t i = 0, size = nodes.size(); i < size; ++i)
{
auto res = futures[i].get();
if (res.error == Coordination::Error::ZNONODE)
continue; /// Node was deleted meanwhile.
const Coordination::Stat & stat = res.stat;
size_t col_num = 0;
res_columns[col_num++]->insert(nodes[i]);
res_columns[col_num++]->insert(res.data);
res_columns[col_num++]->insert(stat.czxid);
res_columns[col_num++]->insert(stat.mzxid);
res_columns[col_num++]->insert(UInt64(stat.ctime / 1000));
res_columns[col_num++]->insert(UInt64(stat.mtime / 1000));
res_columns[col_num++]->insert(stat.version);
res_columns[col_num++]->insert(stat.cversion);
res_columns[col_num++]->insert(stat.aversion);
res_columns[col_num++]->insert(stat.ephemeralOwner);
res_columns[col_num++]->insert(stat.dataLength);
res_columns[col_num++]->insert(stat.numChildren);
res_columns[col_num++]->insert(stat.pzxid);
res_columns[col_num++]->insert(
path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
}
}
}

View File

@ -8,6 +8,8 @@
<!-- Normal limits. -->
<queries>1000</queries>
<query_selects>500</query_selects>
<query_inserts>500</query_inserts>
<errors>0</errors>
<read_rows>1000</read_rows>
<result_rows>0</result_rows>

View File

@ -28,7 +28,7 @@ def system_quota_limits(canonical):
def system_quota_usage(canonical):
canonical_tsv = TSV(canonical)
query = "SELECT quota_name, quota_key, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows," \
query = "SELECT quota_name, quota_key, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows," \
"result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
"FROM system.quota_usage ORDER BY duration"
r = TSV(instance.query(query))
@ -38,7 +38,7 @@ def system_quota_usage(canonical):
def system_quotas_usage(canonical):
canonical_tsv = TSV(canonical)
query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows, " \
query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows, " \
"result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \
"FROM system.quotas_usage ORDER BY quota_name, quota_key, duration"
r = TSV(instance.query(query))
@ -73,6 +73,7 @@ def reset_quotas_and_usage_info():
try:
yield
finally:
copy_quota_xml('simpliest.xml') # To reset usage info.
instance.query("DROP QUOTA IF EXISTS qA, qB")
copy_quota_xml('simpliest.xml') # To reset usage info.
copy_quota_xml('normal_limits.xml')
@ -81,18 +82,18 @@ def reset_quotas_and_usage_info():
def test_quota_from_users_xml():
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
[["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
instance.query("SELECT COUNT() from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]])
def test_simpliest_quota():
@ -102,11 +103,11 @@ def test_simpliest_quota():
"['default']", "[]"]])
system_quota_limits("")
system_quota_usage(
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
def test_tracking_quota():
@ -114,16 +115,16 @@ def test_tracking_quota():
copy_quota_xml('tracking.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 1, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
instance.query("SELECT COUNT() from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 2, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]])
def test_exceed_quota():
@ -131,55 +132,55 @@ def test_exceed_quota():
copy_quota_xml('tiny_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]])
assert re.search("Quota.*has\ been\ exceeded", instance.query_and_get_error("SELECT * from test_table"))
system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]])
# Change quota, now the limits are enough to execute queries.
copy_quota_xml('normal_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 2, 1000, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]])
def test_add_remove_interval():
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
# Add interval.
copy_quota_xml('two_intervals.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']",
"[31556952,63113904]", 0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
["myQuota", 63113904, 1, "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
["myQuota", 63113904, 1, "\\N", "\\N", "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"],
["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
["myQuota", "default", 63113904, 1, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
[["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"],
["myQuota", "default", 63113904, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]])
# Remove interval.
copy_quota_xml('normal_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage(
[["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
[["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]])
# Remove all intervals.
copy_quota_xml('simpliest.xml')
@ -187,26 +188,26 @@ def test_add_remove_interval():
"['default']", "[]"]])
system_quota_limits("")
system_quota_usage(
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
instance.query("SELECT * from test_table")
system_quota_usage(
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
[["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]])
# Add one interval back.
copy_quota_xml('normal_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
def test_add_remove_quota():
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
[["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
# Add quota.
copy_quota_xml('two_quotas.xml')
@ -214,19 +215,19 @@ def test_add_remove_quota():
0, "['default']", "[]"],
["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']",
"[3600,2629746]", 0, "[]", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
["myQuota2", 3600, 1, "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"],
["myQuota2", 3600, 1, "\\N", "\\N", "\\N", "\\N", 4000, 400000, 4000, 400000, 60],
["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
# Drop quota.
copy_quota_xml('normal_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
[["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
# Drop all quotas.
copy_quota_xml('no_quotas.xml')
@ -238,15 +239,15 @@ def test_add_remove_quota():
copy_quota_xml('normal_limits.xml')
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
[["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
def test_reload_users_xml_by_timer():
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]",
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
time.sleep(1) # The modification time of the 'quota.xml' file should be different,
# because config files are reload by timer only when the modification time is changed.
@ -255,25 +256,25 @@ def test_reload_users_xml_by_timer():
["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", ['user_name'], "[31556952]", 0, "['default']",
"[]"]])
assert_eq_with_retry(instance, "SELECT * FROM system.quota_limits",
[["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]])
def test_dcl_introspection():
assert instance.query("SHOW QUOTAS") == "myQuota\n"
assert instance.query(
"SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
"SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
assert instance.query(
"SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
"SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
assert re.match(
"myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
"myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("SELECT * from test_table")
assert re.match(
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"
expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n"
assert expected_access in instance.query("SHOW ACCESS")
# Add interval.
@ -282,8 +283,8 @@ def test_dcl_introspection():
assert instance.query(
"SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000, FOR RANDOMIZED INTERVAL 2 year MAX result_bytes = 30000, read_bytes = 20000, execution_time = 120 TO default\n"
assert re.match(
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
"myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n"
"myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\t0\\t\\\\N\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120",
instance.query("SHOW QUOTA"))
# Drop interval, add quota.
@ -297,7 +298,7 @@ def test_dcl_introspection():
"SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" \
"CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n"
assert re.match(
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
# Drop all quotas.
@ -315,12 +316,12 @@ def test_dcl_management():
assert instance.query(
"SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n"
assert re.match(
"qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("SELECT * from test_table")
assert re.match(
"qA\\t\\t.*\\t39446190\\t1\\t123\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query(
@ -328,37 +329,37 @@ def test_dcl_management():
assert instance.query(
"SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n"
assert re.match(
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t1\\t321\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("SELECT * from test_table")
assert re.match(
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t2\\t321\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query(
"ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS")
assert re.match(
"qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("SELECT * from test_table")
assert re.match(
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("ALTER QUOTA qA RENAME TO qB")
assert instance.query(
"SHOW CREATE QUOTA qB") == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n"
assert re.match(
"qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("SELECT * from test_table")
assert re.match(
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
instance.query("SHOW QUOTA"))
instance.query("DROP QUOTA qB")
@ -367,3 +368,15 @@ def test_dcl_management():
def test_users_xml_is_readonly():
assert re.search("storage is readonly", instance.query_and_get_error("DROP QUOTA myQuota"))
def test_query_inserts():
check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952],
0, "['default']", "[]"]])
system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]])
system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
system_quotas_usage(
[["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])
instance.query("INSERT INTO test_table values(1)")
system_quota_usage(
[["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]])

View File

@ -8,6 +8,8 @@
<!-- Tiny limits. -->
<queries>1</queries>
<query_selects>1</query_selects>
<query_inserts>1</query_inserts>
<errors>1</errors>
<read_rows>1</read_rows>
<result_rows>1</result_rows>

View File

@ -8,6 +8,8 @@
<!-- No limits. Just calculate resource usage for time interval. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<read_rows>0</read_rows>
<result_rows>0</result_rows>

View File

@ -0,0 +1,24 @@
<test max_ignored_relative_change="0.2">
<create_query>drop table if EXISTS test_bm2;</create_query>
<create_query>drop table if EXISTS test_bm_join2;</create_query>
<create_query>create table test_bm2(
dim UInt64,
id UInt64)
ENGINE = MergeTree()
ORDER BY( dim )
SETTINGS index_granularity = 8192;
</create_query>
<create_query>
create table test_bm_join2(
dim UInt64,
ids AggregateFunction(groupBitmap, UInt64) )
ENGINE = MergeTree()
ORDER BY(dim)
SETTINGS index_granularity = 8192;
</create_query>
<fill_query>insert into test_bm2 SELECT 1,number FROM numbers(0, 1000)</fill_query>
<fill_query>insert into test_bm_join2 SELECT 1, bitmapBuild(range(toUInt64(0),toUInt64(11000000)))</fill_query>
<query>select a.dim,bitmapCardinality(b.ids) from test_bm2 a left join test_bm_join2 b using(dim)</query>
<drop_query>drop table if exists test_bm2</drop_query>
<drop_query>drop table if exists test_bm_join2</drop_query>
</test>

View File

@ -19,7 +19,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -38,7 +38,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -76,7 +76,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -95,7 +95,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(10000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -115,7 +115,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -134,7 +134,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -153,7 +153,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -172,7 +172,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -191,7 +191,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -210,7 +210,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(200000000)
FROM numbers_mt(10000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -230,7 +230,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(20000000)
FROM numbers_mt(2000000)
SETTINGS max_threads = 8
FORMAT Null
</query>
@ -249,7 +249,7 @@
toInt256(number) as d,
toString(number) as f,
toFixedString(f, 20) as g
FROM numbers_mt(100000000)
FROM numbers_mt(20000000)
SETTINGS max_threads = 8
FORMAT Null
</query>

View File

@ -57,7 +57,10 @@ q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] []
q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] []
q4_01297 local directory [] [604800] 1 [] ['u1_01297']
-- system.quota_limits
q2_01297 5259492 0 100 11 1000 10000 1001 10001 2.5
q3_01297 5259492 0 \N \N 1002 \N \N \N \N
q3_01297 15778476 0 100 11 \N \N \N \N \N
q4_01297 604800 0 \N \N \N \N \N \N \N
q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5
q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N
q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N
q4_01297 604800 0 \N \N \N \N \N \N \N \N \N
-- query_selects query_inserts
CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297
CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297

View File

@ -125,5 +125,13 @@ SELECT '-- system.quota_limits';
SELECT * FROM system.quota_limits WHERE quota_name LIKE 'q%\_01297' ORDER BY quota_name, duration;
DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297;
SELECT '-- query_selects query_inserts';
CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297;
CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297;
SHOW CREATE QUOTA q1_01297;
SHOW CREATE QUOTA q2_01297;
DROP QUOTA q1_01297, q2_01297;
DROP ROLE r1_01297;
DROP USER u1_01297;

View File

@ -0,0 +1,120 @@
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19

View File

@ -0,0 +1,37 @@
DROP TABLE IF EXISTS x;
DROP TABLE IF EXISTS x_dist;
DROP TABLE IF EXISTS y;
DROP TABLE IF EXISTS y_dist;
CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE y AS system.numbers ENGINE = MergeTree ORDER BY number;
CREATE TABLE x_dist as x ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), x);
CREATE TABLE y_dist as y ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), y);
-- insert into first shard
INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 1;
SELECT * FROM x_dist ORDER by number;
SELECT * FROM y_dist ORDER by number;
-- insert into second shard
INSERT INTO x_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
INSERT INTO y_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2;
SELECT * FROM x_dist ORDER by number;
SELECT * FROM y_dist ORDER by number;
-- no sharding key
INSERT INTO x_dist SELECT * FROM numbers(10); -- { serverError 55 }
INSERT INTO y_dist SELECT * FROM numbers(10); -- { serverError 55 }
-- invalid shard id
INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 }
DROP TABLE x;
DROP TABLE x_dist;
DROP TABLE y;
DROP TABLE y_dist;

View File

@ -0,0 +1,23 @@
DROP TABLE IF EXISTS table_with_single_pk;
CREATE TABLE table_with_single_pk
(
key UInt8,
value String
)
ENGINE = MergeTree
ORDER BY key;
INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000);
SYSTEM FLUSH LOGS;
WITH (
SELECT (event_time, event_time_microseconds)
FROM system.part_log
ORDER BY event_time DESC
LIMIT 1
) AS time
SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail');
DROP TABLE IF EXISTS table_with_single_pk;

View File

@ -0,0 +1,7 @@
clickhouse
task_queue
clickhouse
task_queue
clickhouse
task_queue
ddl

View File

@ -0,0 +1,6 @@
SELECT name FROM system.zookeeper WHERE path = '/';
SELECT name FROM system.zookeeper WHERE path = 'clickhouse';
SELECT name FROM system.zookeeper WHERE path IN ('/');
SELECT name FROM system.zookeeper WHERE path IN ('clickhouse');
SELECT name FROM system.zookeeper WHERE path IN ('/','/clickhouse');
SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/',name) FROM system.zookeeper WHERE (path = '/clickhouse/'));

View File

@ -202,3 +202,4 @@
01674_executable_dictionary_implicit_key
01686_rocksdb
01683_dist_INSERT_block_structure_mismatch
01686_event_time_microseconds_part_log

View File

@ -508,6 +508,7 @@
"01294_lazy_database_concurrent_recreate_reattach_and_show_tables",
"01294_system_distributed_on_cluster",
"01296_create_row_policy_in_current_database",
"01297_create_quota",
"01305_replica_create_drop_zookeeper",
"01307_multiple_leaders_zookeeper",
"01318_long_unsuccessful_mutation_zookeeper",

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
dir=$(dirname $0)
$dir/check-style -n
$dir/check-typos
$dir/check-whitespaces -n
$dir/check-duplicate-includes.sh
$dir/shellcheck-run.sh