Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into master

This commit is contained in:
George 2021-02-11 17:04:49 +03:00
commit 32b8bd7428
254 changed files with 5068 additions and 1373 deletions

View File

@ -1,32 +0,0 @@
# See the example here: https://github.com/github/codeql-action
name: "CodeQL Scanning"
on:
schedule:
- cron: '0 19 * * *'
jobs:
CodeQL-Build:
runs-on: self-hosted
timeout-minutes: 1440
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
fetch-depth: 2
submodules: 'recursive'
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
languages: cpp
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
- run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
- run: cd build && ninja
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1

View File

@ -11,7 +11,6 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.

View File

@ -986,7 +986,7 @@ void BaseDaemon::setupWatchdog()
if (errno == ECHILD)
{
logger().information("Child process no longer exists.");
_exit(status);
_exit(WEXITSTATUS(status));
}
if (WIFEXITED(status))
@ -1020,7 +1020,7 @@ void BaseDaemon::setupWatchdog()
/// Automatic restart is not enabled but you can play with it.
#if 1
_exit(status);
_exit(WEXITSTATUS(status));
#else
logger().information("Will restart.");
if (argv0)

View File

@ -120,7 +120,7 @@ function clone_root
git checkout FETCH_HEAD
echo 'Clonned merge head'
else
git fetch
git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi

View File

@ -190,7 +190,7 @@ case "$stage" in
# Lost connection to the server. This probably means that the server died
# with abort.
echo "failure" > status.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
then
echo "Lost connection to server. See the logs." > description.txt
fi

View File

@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@ -323,7 +324,7 @@ for query_index in queries_to_run:
server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
if elapsed > 10:
if elapsed > args.max_query_seconds:
# Stop processing pathologically slow queries, to avoid timing out
# the entire test task. This shouldn't really happen, so we don't
# need much handling for this case and can just exit.

View File

@ -5,7 +5,10 @@ RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
python3-pip \
python3-setuptools \
python3-wheel
python3-wheel \
brotli \
netcat-openbsd \
zstd
RUN python3 -m pip install \
wheel \
@ -15,7 +18,10 @@ RUN python3 -m pip install \
pytest-randomly \
pytest-rerunfailures \
pytest-timeout \
pytest-xdist
pytest-xdist \
pandas \
numpy \
scipy
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \

View File

@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
- Replication can be easily broken.
- Manual operations on database and tables are forbidden.
- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
## Examples of Use {#examples-of-use}
@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
└───┴─────┴──────┘
```
[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->

View File

@ -12,6 +12,9 @@ List of supported integrations:
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
- [S3](../../../engines/table-engines/integrations/s3.md)
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)

View File

@ -0,0 +1,57 @@
---
toc_priority: 7
toc_title: MongoDB
---
# MongoDB {#mongodb}
MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
```
**Engine Parameters**
- `host:port` — MongoDB server address.
- `database` — Remote database name.
- `collection` — Remote collection name.
- `user` — MongoDB user.
- `password` — User password.
## Usage Example {#usage-example}
Table in ClickHouse which allows to read data from MongoDB collection:
``` text
CREATE TABLE mongo_table
(
key UInt64,
data String
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
Query:
``` sql
SELECT COUNT() FROM mongo_table;
```
``` text
┌─count()─┐
│ 4 │
└─────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) <!--hide-->

View File

@ -104,7 +104,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `max_parts_in_total` — Maximum number of parts in all partitions.
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
- `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting.
**Example of Sections Setting**
``` sql

View File

@ -81,6 +81,7 @@ toc_title: Adopters
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |

View File

@ -29,6 +29,8 @@ Lets look at the section of the users.xml file that defines quotas.
<!-- Unlimited. Just collect data for the specified time interval. -->
<queries>0</queries>
<query_selects>0</query_selects>
<query_inserts>0</query_inserts>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo
<duration>3600</duration>
<queries>1000</queries>
<query_selects>100</query_selects>
<query_inserts>100</query_inserts>
<errors>100</errors>
<result_rows>1000000000</result_rows>
<read_rows>100000000000</read_rows>
@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo
<duration>86400</duration>
<queries>10000</queries>
<query_selects>10000</query_selects>
<query_inserts>10000</query_inserts>
<errors>1000</errors>
<result_rows>5000000000</result_rows>
<read_rows>500000000000</read_rows>
@ -74,6 +80,10 @@ Here are the amounts that can be restricted:
`queries` The total number of requests.
`query_selects` The total number of select requests.
`query_inserts` The total number of insert requests.
`errors` The number of queries that threw an exception.
`result_rows` The total number of rows given as a result.

View File

@ -186,5 +186,16 @@ Possible values:
Default value: auto (number of CPU cores).
During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
## max_partitions_to_read {#max-partitions-to-read}
Limits the maximum number of partitions that can be accessed in one query.
The setting value specified when the table is created can be overridden via query-level setting.
Possible values:
- Any positive integer.
Default value: -1 (unlimited).
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->

View File

@ -2592,4 +2592,58 @@ Possible values:
Default value: `16`.
## optimize_on_insert {#optimize-on-insert}
Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1.
**Example**
The difference between enabled and disabled:
Query:
```sql
SET optimize_on_insert = 1;
CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
INSERT INTO test1 SELECT number % 2 FROM numbers(5);
SELECT * FROM test1;
SET optimize_on_insert = 0;
CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
INSERT INTO test2 SELECT number % 2 FROM numbers(5);
SELECT * FROM test2;
```
Result:
``` text
┌─FirstTable─┐
│ 0 │
│ 1 │
└────────────┘
┌─SecondTable─┐
│ 0 │
│ 0 │
│ 0 │
│ 1 │
│ 1 │
└─────────────┘
```
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../
The `system.part_log` table contains the following columns:
- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
- `NEW_PART` — Inserting of a new data part.
- `MERGE_PARTS` — Merging of data parts.
- `DOWNLOAD_PART` — Downloading a data part.
- `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
- `MUTATE_PART` — Mutating of a data part.
- `MOVE_PART` — Moving the data part from the one disk to another one.
- `event_date` (Date) — Event date.
- `event_time` (DateTime) — Event time.
- `duration_ms` (UInt64) — Duration.
- `database` (String) — Name of the database the data part is in.
- `table` (String) — Name of the table the data part is in.
- `part_name` (String) — Name of the data part.
- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the all value if the partitioning is by `tuple()`.
- `rows` (UInt64) — The number of rows in the data part.
- `size_in_bytes` (UInt64) — Size of the data part in bytes.
- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
- `read_rows` (UInt64) — The number of rows was read during the merge.
- `read_bytes` (UInt64) — The number of bytes was read during the merge.
- `error` (UInt16) — The code number of the occurred error.
- `exception` (String) — Text message of the occurred error.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`.
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge).
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error.
The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
**Example**
``` sql
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
event_time_microseconds: 2021-02-02 11:14:28.861919
duration_ms: 35
database: default
table: log_mt_2
part_name: all_1_1_0
partition_id: all
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
rows: 115418
size_in_bytes: 1074311
merged_from: []
bytes_uncompressed: 0
read_rows: 0
read_bytes: 0
peak_memory_usage: 0
error: 0
exception:
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) <!--hide-->

View File

@ -9,6 +9,8 @@ Columns:
- `0` — Interval is not randomized.
- `1` — Interval is randomized.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.

View File

@ -9,6 +9,8 @@ Columns:
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.

View File

@ -11,6 +11,10 @@ Columns:
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval.
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests.
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval.
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.

View File

@ -1,12 +1,16 @@
# system.zookeeper {#system-zookeeper}
The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
The query must have a path equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
The query must either have a path = condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for.
The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
To output data for all root nodes, write path = /.
If the path specified in path doesnt exist, an exception will be thrown.
The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node.
If in the specified path collection has doesn't exist path, an exception will be thrown.
It can be used to do a batch of ZooKeeper path queries.
Columns:
- `name` (String) — The name of the node.

View File

@ -4,13 +4,42 @@ toc_priority: 106
# argMax {#agg-function-argmax}
Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Syntax**
**Example:**
``` sql
argMax(arg, val)
```
or
``` sql
argMax(tuple(arg, val))
```
**Parameters**
- `arg` — Argument.
- `val` — Value.
**Returned value**
- `arg` value that corresponds to maximum `val` value.
Type: matches `arg` type.
For tuple in the input:
- Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value.
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
**Example**
Input table:
``` text
┌─user─────┬─salary─┐
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu
└──────────┴────────┘
```
Query:
``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
```
Result:
``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->

View File

@ -4,13 +4,42 @@ toc_priority: 105
# argMin {#agg-function-argmin}
Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Syntax**
**Example:**
``` sql
argMin(arg, val)
```
or
``` sql
argMin(tuple(arg, val))
```
**Parameters**
- `arg` — Argument.
- `val` — Value.
**Returned value**
- `arg` value that corresponds to minimum `val` value.
Type: matches `arg` type.
For tuple in the input:
- Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value.
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
**Example**
Input table:
``` text
┌─user─────┬─salary─┐
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu
└──────────┴────────┘
```
Query:
``` sql
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
```
Result:
``` text
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
│ worker │ ('worker',1000) │
└──────────────────────┴─────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

View File

@ -0,0 +1,19 @@
---
toc_priority: 141
---
# deltaSum {#agg_functions-deltasum}
Syntax: `deltaSum(value)`
Adds the differences between consecutive rows. If the difference is negative, it is ignored.
`value` must be some integer or floating point type.
Example:
```sql
select deltaSum(arrayJoin([1, 2, 3])); -- => 2
select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7
select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
```

View File

@ -0,0 +1,71 @@
---
toc_priority: 310
toc_title: mannWhitneyUTest
---
# mannWhitneyUTest {#mannwhitneyutest}
Applies the Mann-Whitney rank test to samples from two populations.
**Syntax**
``` sql
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
**Parameters**
- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
- `'two-sided'`;
- `'greater'`;
- `'less'`.
- `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 10 │ 0 │
│ 11 │ 0 │
│ 12 │ 0 │
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
```
Result:
``` text
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
│ (9,0.04042779918503192) │
└────────────────────────────────────────────────────────┘
```
**See Also**
- [MannWhitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)
- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 300
toc_title: studentTTest
---
# studentTTest {#studentttest}
Applies Student's t-test to samples from two populations.
**Syntax**
``` sql
studentTTest(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
**Parameters**
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 21.1 │ 0 │
│ 21.9 │ 1 │
│ 21.7 │ 0 │
│ 19.9 │ 1 │
│ 21.8 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
```
Result:
``` text
┌─studentTTest(sample_data, sample_index)───┐
│ (-0.21739130434783777,0.8385421208415731) │
└───────────────────────────────────────────┘
```
**See Also**
- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test)
- [welchTTest function](welchttest.md#welchttest)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 301
toc_title: welchTTest
---
# welchTTest {#welchttest}
Applies Welch's t-test to samples from two populations.
**Syntax**
``` sql
welchTTest(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
**Parameters**
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Input table:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 22.1 │ 0 │
│ 21.9 │ 0 │
│ 18.9 │ 1 │
│ 20.3 │ 1 │
│ 19 │ 1 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
```
Result:
``` text
┌─welchTTest(sample_data, sample_index)─────┐
│ (2.7988719532211235,0.051807360348581945) │
└───────────────────────────────────────────┘
```
**See Also**
- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
- [studentTTest function](studentttest.md#studentttest)
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->

View File

@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that
Details of the algorithm:
- If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary.
- If there are overlapping ranges, you can use any.
- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides.
- If there are overlapping ranges, it returns value for any (random) range.
- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides.
Configuration example:

View File

@ -5,7 +5,7 @@ toc_title: QUOTA
# ALTER QUOTA {#alter-quota-statement}
Changes [quotas](../../../operations/access-rights.md#quotas-management).
Changes quotas.
Syntax:
@ -14,13 +14,13 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -13,14 +13,14 @@ Syntax:
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -59,6 +59,10 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->

View File

@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum
- The values calculated from the `DEFAULT` expressions specified in the table definition.
- Zeros and empty strings, if `DEFAULT` expressions are not defined.
If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query:
``` sql

View File

@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
- Репликация может быть легко нарушена.
- Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
## Примеры использования {#examples-of-use}
@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
└───┴─────┴──────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->

View File

@ -12,7 +12,10 @@ toc_priority: 30
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) <!--hide-->

View File

@ -0,0 +1,57 @@
---
toc_priority: 7
toc_title: MongoDB
---
# MongoDB {#mongodb}
Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
```
**Параметры движка**
- `host:port` — адрес сервера MongoDB.
- `database` — имя базы данных на удалённом сервере.
- `collection` — имя коллекции на удалённом сервере.
- `user` — пользователь MongoDB.
- `password` — пароль пользователя.
## Примеры использования {#usage-example}
Таблица в ClickHouse для чтения данных из колекции MongoDB:
``` text
CREATE TABLE mongo_table
(
key UInt64,
data String
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
Запрос к таблице:
``` sql
SELECT COUNT() FROM mongo_table;
```
``` text
┌─count()─┐
│ 4 │
└─────────┘
```
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) <!--hide-->

View File

@ -94,6 +94,7 @@ ORDER BY expr
- `max_parts_in_total` — максимальное количество кусков во всех партициях.
- `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
- `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
- `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках.
**Пример задания секций**
@ -711,4 +712,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства.
[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) <!--hide-->

View File

@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все
При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##}
## max_partitions_to_read {#max-partitions-to-read}
Ограничивает максимальное число партиций для чтения в одном запросе.
Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса.
Возможные значения:
- Любое положительное целое число.
Значение по умолчанию: -1 (неограниченно).
[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->

View File

@ -2473,4 +2473,58 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
Значение по умолчанию: `16`.
## optimize_on_insert {#optimize-on-insert}
Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы).
Возможные значения:
- 0 — выключена
- 1 — включена.
Значение по умолчанию: 1.
**Пример**
Сравните добавление данных при включенной и выключенной настройке:
Запрос:
```sql
SET optimize_on_insert = 1;
CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
INSERT INTO test1 SELECT number % 2 FROM numbers(5);
SELECT * FROM test1;
SET optimize_on_insert = 0;
CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
INSERT INTO test2 SELECT number % 2 FROM numbers(5);
SELECT * FROM test2;
```
Результат:
``` text
┌─FirstTable─┐
│ 0 │
│ 1 │
└────────────┘
┌─SecondTable─┐
│ 0 │
│ 0 │
│ 0 │
│ 1 │
│ 1 │
└─────────────┘
```
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -6,29 +6,62 @@
Столбцы:
- `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок.
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
- `NEW_PART` — вставка нового куска.
- `MERGE_PARTS` — слияние кусков.
- `DOWNLOAD_PART` — загрузка с реплики.
- `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
- `MUTATE_PART` — изменение куска.
- `MOVE_PART` — перемещение куска между дисками.
- `event_date` (Date) — дата события.
- `event_time` (DateTime) — время события.
- `duration_ms` (UInt64) — длительность.
- `database` (String) — имя базы данных, в которой находится кусок.
- `table` (String) — имя таблицы, в которой находится кусок.
- `part_name` (String) — имя куска.
- `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение all, если таблица партициируется по выражению `tuple()`.
- `rows` (UInt64) — число строк в куске.
- `size_in_bytes` (UInt64) — размер куска данных в байтах.
- `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
- `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт.
- `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков.
- `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков.
- `error` (UInt16) — код ошибки, возникшей при текущем событии.
- `exception` (String) — текст ошибки.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность.
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
- `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`.
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии.
- `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки.
Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
**Пример**
``` sql
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
duration_ms: 35
database: default
table: log_mt_2
part_name: all_1_1_0
partition_id: all
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
rows: 115418
size_in_bytes: 1074311
merged_from: []
bytes_uncompressed: 0
read_rows: 0
read_bytes: 0
peak_memory_usage: 0
error: 0
exception:
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) <!--hide-->

View File

@ -4,8 +4,63 @@ toc_priority: 106
# argMax {#agg-function-argmax}
Синтаксис: `argMax(arg, val)`
Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений.
Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
**Синтаксис**
``` sql
argMax(arg, val)
```
или
``` sql
argMax(tuple(arg, val))
```
**Параметры**
- `arg` — аргумент.
- `val` — значение.
**Возвращаемое значение**
- Значение `arg`, соответствующее максимальному значению `val`.
Тип: соответствует типу `arg`.
Если передан кортеж:
- Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`.
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
**Пример**
Исходная таблица:
``` text
┌─user─────┬─salary─┐
│ director │ 5000 │
│ manager │ 3000 │
│ worker │ 1000 │
└──────────┴────────┘
```
Запрос:
``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
```
Результат:
``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->

View File

@ -4,11 +4,42 @@ toc_priority: 105
# argMin {#agg-function-argmin}
Синтаксис: `argMin(arg, val)`
Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений.
Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
**Пример:**
**Синтаксис**
``` sql
argMin(arg, val)
```
или
``` sql
argMin(tuple(arg, val))
```
**Параметры**
- `arg` — аргумент.
- `val` — значение.
**Возвращаемое значение**
- Значение `arg`, соответствующее минимальному значению `val`.
Тип: соответствует типу `arg`.
Если передан кортеж:
- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`.
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
**Пример**
Исходная таблица:
``` text
┌─user─────┬─salary─┐
@ -18,14 +49,18 @@ toc_priority: 105
└──────────┴────────┘
```
Запрос:
``` sql
SELECT argMin(user, salary) FROM salary
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
```
Результат:
``` text
┌─argMin(user, salary)─┐
│ worker │
└──────────────────────┘
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─
│ worker │ ('worker',1000) │
└──────────────────────┴─────────────────────────────
```
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->

View File

@ -0,0 +1,71 @@
---
toc_priority: 310
toc_title: mannWhitneyUTest
---
# mannWhitneyUTest {#mannwhitneyutest}
Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние.
Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным.
**Параметры**
- `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
- `'two-sided'`;
- `'greater'`;
- `'less'`.
- `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 10 │ 0 │
│ 11 │ 0 │
│ 12 │ 0 │
│ 1 │ 1 │
│ 2 │ 1 │
│ 3 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
```
Результат:
``` text
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
│ (9,0.04042779918503192) │
└────────────────────────────────────────────────────────┘
```
**Смотрите также**
- [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 300
toc_title: studentTTest
---
# studentTTest {#studentttest}
Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
studentTTest(sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать.
**Параметры**
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 21.1 │ 0 │
│ 21.9 │ 1 │
│ 21.7 │ 0 │
│ 19.9 │ 1 │
│ 21.8 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
```
Результат:
``` text
┌─studentTTest(sample_data, sample_index)───┐
│ (-0.21739130434783777,0.8385421208415731) │
└───────────────────────────────────────────┘
```
**Смотрите также**
- [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0)
- [welchTTest](welchttest.md#welchttest)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->

View File

@ -0,0 +1,65 @@
---
toc_priority: 301
toc_title: welchTTest
---
# welchTTest {#welchttest}
Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей.
**Синтаксис**
``` sql
welchTTest(sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать.
**Параметры**
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Таблица:
``` text
┌─sample_data─┬─sample_index─┐
│ 20.3 │ 0 │
│ 22.1 │ 0 │
│ 21.9 │ 0 │
│ 18.9 │ 1 │
│ 20.3 │ 1 │
│ 19 │ 1 │
└─────────────┴──────────────┘
```
Запрос:
``` sql
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
```
Результат:
``` text
┌─welchTTest(sample_data, sample_index)─────┐
│ (2.7988719532211235,0.051807360348581945) │
└───────────────────────────────────────────┘
```
**Смотрите также**
- [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0)
- [studentTTest](studentttest.md#studentttest)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->

View File

@ -205,8 +205,8 @@ RANGE(MIN first MAX last)
Особенности алгоритма:
- Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря.
- Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
- Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
- Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона.
- Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
Пример конфигурации:

View File

@ -56,9 +56,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view)
<!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->

View File

@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable
- Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы.
- Нули и пустые строки, если `DEFAULT` не определены.
Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе.
В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде:
``` sql

View File

@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
VersionedCollapsingMergeTree(sign, version)
```
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel”
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel”
列数据类型应为 `Int8`.

View File

@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
# 系统。动物园管理员 {#system-zookeeper}
如果未配置ZooKeeper则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。
查询必须具有 path WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
查询必须具有 path WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。
要输出所有根节点的数据write path= /.
如果在指定的路径 path 不存在,将引发异常。
查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。
如果在指定的 path 集合中有不存在的路径,将引发异常。
它可以用来做一批ZooKeeper路径查询。
列:
- `name` (String) — The name of the node.

View File

@ -1374,7 +1374,30 @@ private:
{
// Probably the server is dead because we found an assertion
// failure. Fail fast.
fmt::print(stderr, "Lost connection to the server\n");
fmt::print(stderr, "Lost connection to the server.\n");
// Print the changed settings because they might be needed to
// reproduce the error.
const auto & changes = context.getSettingsRef().changes();
if (!changes.empty())
{
fmt::print(stderr, "Changed settings: ");
for (size_t i = 0; i < changes.size(); ++i)
{
if (i)
{
fmt::print(stderr, ", ");
}
fmt::print(stderr, "{} = '{}'", changes[i].name,
toString(changes[i].value));
}
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "No changed settings.\n");
}
return false;
}

View File

@ -363,6 +363,16 @@ void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
frame.end_offset = getRandomField(0).get<Int64>();
break;
}
case 5:
{
frame.begin_preceding = fuzz_rand() % 2;
break;
}
case 6:
{
frame.end_preceding = fuzz_rand() % 2;
break;
}
default:
break;
}

View File

@ -29,4 +29,25 @@
<test>{display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 </test> <!-- if it matched to the substring "test" in the server display name - -->
<production>{display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 </production> <!-- if it matched to the substring "production" in the server display name -->
</prompt_by_server_display_name>
<!--
Settings adjustable via command-line parameters
can take their defaults from that config file, see examples:
<host>127.0.0.1</host>
<port>9440</port>
<secure>1</secure>
<user>dbuser</user>
<password>dbpwd123</password>
<format>PrettyCompactMonoBlock</format>
<multiline>1</multiline>
<multiquery>1</multiquery>
<stacktrace>1</stacktrace>
<database>default2</database>
<pager>less -SR</pager>
<history_file>/home/user/clickhouse_custom_history.log</history_file>
<max_parser_depth>2500</max_parser_depth>
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
-->
</config>

View File

@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
}
}
/// Delete helping tables in both cases (whole table is done or not)
dropHelpingTables(task_table);
if (!table_is_done)
{
throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution",
@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
{
LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
}
else
{
/// Delete helping tables in case that whole table is done
dropHelpingTables(task_table);
}
return table_is_done;
}

View File

@ -31,6 +31,8 @@ struct Quota : public IAccessEntity
enum ResourceType
{
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of inserts queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType
static const auto info = make_info("QUERIES", 1);
return info;
}
case Quota::QUERY_SELECTS:
{
static const auto info = make_info("QUERY_SELECTS", 1);
return info;
}
case Quota::QUERY_INSERTS:
{
static const auto info = make_info("QUERY_INSERTS", 1);
return info;
}
case Quota::ERRORS:
{
static const auto info = make_info("ERRORS", 1);

View File

@ -0,0 +1,49 @@
#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
AggregateFunctionPtr createAggregateFunctionDeltaSum(
const String & name,
const DataTypes & arguments,
const Array & params)
{
assertNoParameters(name, params);
if (arguments.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function " + name,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
DataTypePtr data_type = arguments[0];
if (isInteger(data_type) || isFloat(data_type))
return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
*data_type, arguments, params));
else
throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
factory.registerFunction("deltaSum", { createAggregateFunctionDeltaSum, properties });
}
}

View File

@ -0,0 +1,129 @@
#pragma once
#include <type_traits>
#include <experimental/type_traits>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename T>
struct AggregationFunctionDeltaSumData
{
T sum = 0;
T last = 0;
T first = 0;
bool seen_last = false;
bool seen_first = false;
};
template <typename T>
class AggregationFunctionDeltaSum final
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
{
public:
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params}
{}
AggregationFunctionDeltaSum()
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
{}
String getName() const override { return "deltaSum"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
if ((this->data(place).last < value) && this->data(place).seen_last)
{
this->data(place).sum += (value - this->data(place).last);
}
this->data(place).last = value;
this->data(place).seen_last = true;
if (!this->data(place).seen_first)
{
this->data(place).first = value;
this->data(place).seen_first = true;
}
}
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto place_data = &this->data(place);
auto rhs_data = &this->data(rhs);
if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first)
{
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
// take last value from the rhs, so first and last become 0 and 7.
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
place_data->last = rhs_data->last;
}
else if ((rhs_data->last < place_data->first && rhs_data->seen_last && place_data->seen_first))
{
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
// assume the input interval states are sorted by time, we assume this is a counter
// reset, and therefore do *not* add the difference between our first value and the
// rhs last value.
place_data->sum += rhs_data->sum;
place_data->first = rhs_data->first;
}
else if (rhs_data->seen_first)
{
// If we're here then the lhs is an empty state and the rhs does have some state, so
// we'll just take that state.
place_data->first = rhs_data->first;
place_data->seen_first = rhs_data->seen_first;
place_data->last = rhs_data->last;
place_data->seen_last = rhs_data->seen_last;
place_data->sum = rhs_data->sum;
}
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
writeIntBinary(this->data(place).sum, buf);
writeIntBinary(this->data(place).first, buf);
writeIntBinary(this->data(place).last, buf);
writePODBinary<bool>(this->data(place).seen_first, buf);
writePODBinary<bool>(this->data(place).seen_last, buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
readIntBinary(this->data(place).sum, buf);
readIntBinary(this->data(place).first, buf);
readIntBinary(this->data(place).last, buf);
readPODBinary<bool>(this->data(place).seen_first, buf);
readPODBinary<bool>(this->data(place).seen_last, buf);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
}
};
}

View File

@ -16,6 +16,22 @@ namespace ErrorCodes
namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
return nullptr;
}
template <template <typename> class Data>
AggregateFunctionPtr createAggregateFunctionBitmap(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
@ -28,7 +44,7 @@ namespace
+ " is illegal, because it cannot be used in Bitmap operations",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
if (!res)
throw Exception(
@ -55,7 +71,7 @@ namespace
const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
argument_type_ptr = aggfunc->getArgumentTypes()[0];
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
*argument_type_ptr, argument_type_ptr));
if (!res)
throw Exception(

View File

@ -32,6 +32,7 @@ template <typename T, UInt8 small_set_size>
class RoaringBitmapWithSmallSet : private boost::noncopyable
{
private:
using UnsignedT = std::make_unsigned_t<T>;
SmallSet<T, small_set_size> small;
using ValueBuffer = std::vector<T>;
using RoaringBitmap = std::conditional_t<sizeof(T) >= 8, roaring::Roaring64Map, roaring::Roaring>;
@ -363,6 +364,7 @@ public:
/**
* Check whether the argument is the subset of this set.
* Empty set is a subset of any other set (consistent with hasAll).
* It's used in subset and currently only support comparing same type
*/
UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
{
@ -486,6 +488,7 @@ public:
/**
* Return new set with specified range (not include the range_end)
* It's used in subset and currently only support UInt32
*/
UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const
{
@ -525,6 +528,7 @@ public:
/**
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
* It's used in subset and currently only support UInt32
*/
UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
{
@ -578,10 +582,10 @@ public:
{
if (small.empty())
return 0;
auto min_val = std::numeric_limits<std::make_unsigned_t<T>>::max();
auto min_val = std::numeric_limits<UnsignedT>::max();
for (const auto & x : small)
{
auto val = x.getValue();
UnsignedT val = x.getValue();
if (val < min_val)
min_val = val;
}
@ -597,10 +601,10 @@ public:
{
if (small.empty())
return 0;
auto max_val = std::numeric_limits<std::make_unsigned_t<T>>::min();
UnsignedT max_val = 0;
for (const auto & x : small)
{
auto val = x.getValue();
UnsignedT val = x.getValue();
if (val > max_val)
max_val = val;
}
@ -611,7 +615,8 @@ public:
}
/**
* Replace value
* Replace value.
* It's used in transform and currently can only support UInt32
*/
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
{

View File

@ -147,7 +147,7 @@ public:
}
if (params[0].getType() != Field::Types::String)
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception("Aggregate function " + getName() + " require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto param = params[0].get<String>();
if (param == "two-sided")
@ -158,13 +158,13 @@ public:
alternative = Alternative::Greater;
else
throw Exception("Unknown parameter in aggregate function " + getName() +
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
". It must be one of: 'two-sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
if (params.size() != 2)
return;
if (params[1].getType() != Field::Types::UInt64)
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception("Aggregate function " + getName() + " require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
}

View File

@ -11,6 +11,7 @@ class AggregateFunctionFactory;
void registerAggregateFunctionAvg(AggregateFunctionFactory &);
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
@ -66,6 +67,7 @@ void registerAggregateFunctions()
registerAggregateFunctionAvg(factory);
registerAggregateFunctionAvgWeighted(factory);
registerAggregateFunctionCount(factory);
registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);

View File

@ -19,6 +19,7 @@ SRCS(
AggregateFunctionCategoricalInformationValue.cpp
AggregateFunctionCombinatorFactory.cpp
AggregateFunctionCount.cpp
AggregateFunctionDeltaSum.cpp
AggregateFunctionDistinct.cpp
AggregateFunctionEntropy.cpp
AggregateFunctionFactory.cpp

View File

@ -100,8 +100,8 @@ endif()
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp)
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h)
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
list (APPEND dbms_sources
AggregateFunctions/AggregateFunctionFactory.cpp

View File

@ -75,8 +75,28 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
ColumnAggregateFunction::~ColumnAggregateFunction()
{
if (!func->hasTrivialDestructor() && !src)
for (auto * val : data)
func->destroy(val);
{
if (copiedDataInfo.empty())
{
for (auto * val : data)
{
func->destroy(val);
}
}
else
{
size_t pos;
for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
{
pos = it->getValue().second;
if (data[pos] != nullptr)
{
func->destroy(data[pos]);
data[pos] = nullptr;
}
}
}
}
}
void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
@ -455,14 +475,37 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
/// (only as a whole, see comment above).
ensureOwnership();
insertDefault();
insertMergeFrom(from, n);
insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
}
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
{
ensureOwnership();
insertDefault();
insertMergeFrom(place);
insertCopyFrom(place);
}
void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
{
Map::LookupResult result;
result = copiedDataInfo.find(place);
if (result == nullptr)
{
copiedDataInfo[place] = data.size()-1;
func->merge(data.back(), place, &createOrGetArena());
}
else
{
size_t pos = result->getValue().second;
if (pos != data.size() - 1)
{
data[data.size() - 1] = data[pos];
}
else /// insert same data to same pos, merge them.
{
func->merge(data.back(), place, &createOrGetArena());
}
}
}
void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
@ -697,5 +740,4 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
return cloned_col;
}
}
}

View File

@ -13,6 +13,8 @@
#include <Functions/FunctionHelpers.h>
#include <Common/HashTable/HashMap.h>
namespace DB
{
@ -82,6 +84,17 @@ private:
/// Name of the type to distinguish different aggregation states.
String type_string;
/// MergedData records, used to avoid duplicated data copy.
///key: src pointer, val: pos in current column.
using Map = HashMap<
ConstAggregateDataPtr,
size_t,
DefaultHash<ConstAggregateDataPtr>,
HashTableGrower<3>,
HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
Map copiedDataInfo;
ColumnAggregateFunction() {}
/// Create a new column that has another column as a source.
@ -140,6 +153,8 @@ public:
void insertFrom(ConstAggregateDataPtr place);
void insertCopyFrom(ConstAggregateDataPtr place);
/// Merge state at last row with specified state in another column.
void insertMergeFrom(ConstAggregateDataPtr place);

View File

@ -28,12 +28,12 @@ namespace ColumnsHashing
/// For the case when there is one numeric key.
/// UInt8/16/32/64 for any type with corresponding bit width.
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
struct HashMethodOneNumber
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
{
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
const char * vec;
@ -70,12 +70,12 @@ struct HashMethodOneNumber
/// For the case when there is one string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
struct HashMethodString
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
{
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
const IColumn::Offset * offsets;
const UInt8 * chars;
@ -108,12 +108,13 @@ protected:
/// For the case when there is one fixed-length string key.
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
struct HashMethodFixedString
: public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::
HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
{
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
size_t n;
const ColumnFixedString::Chars * chars;
@ -454,13 +455,13 @@ template <>
struct LowCardinalityKeys<false> {};
/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false>
struct HashMethodKeysFixed
: private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
{
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
static constexpr bool has_nullable_keys = has_nullable_keys_;
@ -540,13 +541,13 @@ protected:
};
/// For the case when there is one string key.
template <typename Value, typename Mapped, bool use_cache = true>
template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
struct HashMethodHashed
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
{
using Key = UInt128;
using Self = HashMethodHashed<Value, Mapped, use_cache>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
ColumnRawPtrs key_columns;

View File

@ -87,34 +87,61 @@ public:
bool isInserted() const { return inserted; }
};
template <typename Mapped>
class FindResultImpl
/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
/// Only bool found is required.
/// So we will have 4 different specializations for FindResultImpl
class FindResultImplBase
{
Mapped * value;
bool found;
public:
FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {}
explicit FindResultImplBase(bool found_) : found(found_) {}
bool isFound() const { return found; }
Mapped & getMapped() const { return *value; }
};
template <bool need_offset = false>
class FindResultImplOffsetBase
{
public:
constexpr static bool has_offset = need_offset;
explicit FindResultImplOffsetBase(size_t /* off */) {}
};
template <>
class FindResultImpl<void>
class FindResultImplOffsetBase<true>
{
bool found;
size_t offset;
public:
explicit FindResultImpl(bool found_) : found(found_) {}
bool isFound() const { return found; }
constexpr static bool has_offset = true;
explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
ALWAYS_INLINE size_t getOffset() const { return offset; }
};
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
template <typename Mapped, bool need_offset = false>
class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
{
Mapped * value;
public:
FindResultImpl(Mapped * value_, bool found_, size_t off)
: FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
Mapped & getMapped() const { return *value; }
};
template <bool need_offset>
class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
{
public:
FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
};
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
class HashMethodBase
{
public:
using EmplaceResult = EmplaceResultImpl<Mapped>;
using FindResult = FindResultImpl<Mapped>;
using FindResult = FindResultImpl<Mapped, need_offset>;
static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
using Cache = LastElementCache<Value, consecutive_keys_optimization>;
@ -217,12 +244,15 @@ protected:
{
if constexpr (Cache::consecutive_keys_optimization)
{
/// It's possible to support such combination, but code will became more complex.
/// Now there's not place where we need this options enabled together
static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
if (cache.check(key))
{
if constexpr (has_mapped)
return FindResult(&cache.value.second, cache.found);
return FindResult(&cache.value.second, cache.found, 0);
else
return FindResult(cache.found);
return FindResult(cache.found, 0);
}
}
@ -247,10 +277,15 @@ protected:
}
}
size_t offset = 0;
if constexpr (FindResult::has_offset)
{
offset = it ? data.offsetInternal(it) : 0;
}
if constexpr (has_mapped)
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
else
return FindResult(it != nullptr);
return FindResult(it != nullptr, offset);
}
};

View File

@ -63,9 +63,6 @@ public:
/// Call from master thread as soon as possible (e.g. when thread accepted connection)
static void initializeQuery();
/// Sets query_context for current thread group
static void attachQueryContext(Context & query_context);
/// You must call one of these methods when create a query child thread:
/// Add current thread to a group associated with the thread group
static void attachTo(const ThreadGroupStatusPtr & thread_group);
@ -99,6 +96,10 @@ public:
private:
static void defaultThreadDeleter();
/// Sets query_context for current thread group
/// Can by used only through QueryScope
static void attachQueryContext(Context & query_context);
};
}

View File

@ -538,7 +538,8 @@
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \
M(1002, UNKNOWN_EXCEPTION)
M(1002, UNKNOWN_EXCEPTION) \
M(1003, INVALID_SHARD_ID)
/* See END */

View File

@ -476,6 +476,17 @@ public:
size_t getBufferSizeInCells() const { return NUM_CELLS; }
/// Return offset for result in internal buffer.
/// Result can have value up to `getBufferSizeInCells() + 1`
/// because offset for zero value considered to be 0
/// and for other values it will be `offset in buffer + 1`
size_t offsetInternal(ConstLookupResult ptr) const
{
if (ptr->isZero(*this))
return 0;
return ptr - buf + 1;
}
const Cell * data() const { return buf; }
Cell * data() { return buf; }

View File

@ -109,6 +109,11 @@ struct HashMapCell
DB::assertChar(',', rb);
DB::readDoubleQuoted(value.second, rb);
}
static bool constexpr need_to_notify_cell_during_move = false;
static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
};
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>

View File

@ -69,11 +69,16 @@ namespace ZeroTraits
{
template <typename T>
bool check(const T x) { return x == 0; }
inline bool check(const T x) { return x == 0; }
template <typename T>
void set(T & x) { x = 0; }
inline void set(T & x) { x = 0; }
template <>
inline bool check(const char * x) { return x == nullptr; }
template <>
inline void set(const char *& x){ x = nullptr; }
}
@ -204,6 +209,13 @@ struct HashTableCell
/// Deserialization, in binary and text form.
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
/// When cell pointer is moved during erase, reinsert or resize operations
static constexpr bool need_to_notify_cell_during_move = false;
static void move(HashTableCell * /* old_location */, HashTableCell * /* new_location */) {}
};
/**
@ -334,6 +346,32 @@ struct ZeroValueStorage<false, Cell>
};
template <bool enable, typename Allocator, typename Cell>
struct AllocatorBufferDeleter;
template <typename Allocator, typename Cell>
struct AllocatorBufferDeleter<false, Allocator, Cell>
{
AllocatorBufferDeleter(Allocator &, size_t) {}
void operator()(Cell *) const {}
};
template <typename Allocator, typename Cell>
struct AllocatorBufferDeleter<true, Allocator, Cell>
{
AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
: allocator(allocator_)
, size(size_) {}
void operator()(Cell * buffer) const { allocator.free(buffer, size); }
Allocator & allocator;
size_t size;
};
// The HashTable
template
<
@ -427,7 +465,6 @@ protected:
}
}
/// Increase the size of the buffer.
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
{
@ -460,7 +497,24 @@ protected:
new_grower.increaseSize();
/// Expand the space.
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
size_t old_buffer_size = getBufferSizeInBytes();
/** If cell required to be notified during move we need to temporary keep old buffer
* because realloc does not quarantee for reallocated buffer to have same base address
*/
using Deleter = AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move, Allocator, Cell>;
Deleter buffer_deleter(*this, old_buffer_size);
std::unique_ptr<Cell, Deleter> old_buffer(buf, buffer_deleter);
if constexpr (Cell::need_to_notify_cell_during_move)
{
buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
}
else
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
grower = new_grower;
/** Now some items may need to be moved to a new location.
@ -470,7 +524,12 @@ protected:
size_t i = 0;
for (; i < old_size; ++i)
if (!buf[i].isZero(*this))
reinsert(buf[i], buf[i].getHash(*this));
{
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
if constexpr (Cell::need_to_notify_cell_during_move)
Cell::move(&(old_buffer.get())[i], &buf[updated_place_value]);
}
/** There is also a special case:
* if the element was to be at the end of the old buffer, [ x]
@ -481,7 +540,13 @@ protected:
* process tail from the collision resolution chain immediately after it [ o x ]
*/
for (; !buf[i].isZero(*this); ++i)
reinsert(buf[i], buf[i].getHash(*this));
{
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
if constexpr (Cell::need_to_notify_cell_during_move)
if (&buf[i] != &buf[updated_place_value])
Cell::move(&buf[i], &buf[updated_place_value]);
}
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
watch.stop();
@ -495,20 +560,20 @@ protected:
/** Paste into the new buffer the value that was in the old buffer.
* Used when increasing the buffer size.
*/
void reinsert(Cell & x, size_t hash_value)
size_t reinsert(Cell & x, size_t hash_value)
{
size_t place_value = grower.place(hash_value);
/// If the element is in its place.
if (&x == &buf[place_value])
return;
return place_value;
/// Compute a new location, taking into account the collision resolution chain.
place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
/// If the item remains in its place in the old collision resolution chain.
if (!buf[place_value].isZero(*this))
return;
return place_value;
/// Copy to a new location and zero the old one.
x.setHash(hash_value);
@ -516,6 +581,7 @@ protected:
x.setZero();
/// Then the elements that previously were in collision with this can move to the old place.
return place_value;
}
@ -881,7 +947,11 @@ public:
/// Reinsert node pointed to by iterator
void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
{
reinsert(*it.getPtr(), hash_value);
size_t place_value = reinsert(*it.getPtr(), hash_value);
if constexpr (Cell::need_to_notify_cell_during_move)
if (it.getPtr() != &buf[place_value])
Cell::move(it.getPtr(), &buf[place_value]);
}
@ -958,8 +1028,14 @@ public:
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
}
std::enable_if_t<Grower::performs_linear_probing_with_single_step, void>
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
ALWAYS_INLINE erase(const Key & x)
{
return erase(x, hash(x));
}
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
ALWAYS_INLINE erase(const Key & x, size_t hash_value)
{
/** Deletion from open addressing hash table without tombstones
*
@ -977,21 +1053,19 @@ public:
{
--m_size;
this->clearHasZero();
return true;
}
else
{
return;
return false;
}
}
size_t hash_value = hash(x);
size_t erased_key_position = findCell(x, hash_value, grower.place(hash_value));
/// Key is not found
if (buf[erased_key_position].isZero(*this))
{
return;
}
return false;
/// We need to guarantee loop termination because there will be empty position
assert(m_size < grower.bufSize());
@ -1056,12 +1130,18 @@ public:
/// Move the element to the freed place
memcpy(static_cast<void *>(&buf[erased_key_position]), static_cast<void *>(&buf[next_position]), sizeof(Cell));
if constexpr (Cell::need_to_notify_cell_during_move)
Cell::move(&buf[next_position], &buf[erased_key_position]);
/// Now we have another freed place
erased_key_position = next_position;
}
buf[erased_key_position].setZero();
--m_size;
return true;
}
bool ALWAYS_INLINE has(const Key & x) const
@ -1214,6 +1294,17 @@ public:
return grower.bufSize();
}
/// Return offset for result in internal buffer.
/// Result can have value up to `getBufferSizeInCells() + 1`
/// because offset for zero value considered to be 0
/// and for other values it will be `offset in buffer + 1`
size_t offsetInternal(ConstLookupResult ptr) const
{
if (ptr->isZero(*this))
return 0;
return ptr - buf + 1;
}
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const
{

View File

@ -0,0 +1,244 @@
#pragma once
#include <common/types.h>
#include <boost/intrusive/trivial_value_traits.hpp>
#include <boost/intrusive/list.hpp>
#include <boost/noncopyable.hpp>
#include <Core/Defines.h>
#include <Common/Exception.h>
#include <Common/HashTable/HashMap.h>
#include <Common/PODArray.h>
template <typename TKey, typename TMapped, typename Hash, bool save_hash_in_cell>
struct LRUHashMapCell :
public std::conditional_t<save_hash_in_cell,
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>
{
public:
using Key = TKey;
using Base = std::conditional_t<save_hash_in_cell,
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>;
using Mapped = typename Base::Mapped;
using State = typename Base::State;
using mapped_type = Mapped;
using key_type = Key;
using Base::Base;
static bool constexpr need_to_notify_cell_during_move = true;
static void move(LRUHashMapCell * __restrict old_location, LRUHashMapCell * __restrict new_location)
{
/** We update new location prev and next pointers because during hash table resize
* they can be updated during move of another cell.
*/
new_location->prev = old_location->prev;
new_location->next = old_location->next;
LRUHashMapCell * prev = new_location->prev;
LRUHashMapCell * next = new_location->next;
/// Updated previous next and next previous nodes of list to point to new location
if (prev)
prev->next = new_location;
if (next)
next->prev = new_location;
}
private:
template<typename, typename, typename, bool>
friend class LRUHashMapCellNodeTraits;
LRUHashMapCell * next = nullptr;
LRUHashMapCell * prev = nullptr;
};
template<typename Key, typename Value, typename Hash, bool save_hash_in_cell>
struct LRUHashMapCellNodeTraits
{
using node = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell>;
using node_ptr = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
using const_node_ptr = const LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
static node * get_next(const node * ptr) { return ptr->next; }
static void set_next(node * __restrict ptr, node * __restrict next) { ptr->next = next; }
static node * get_previous(const node * ptr) { return ptr->prev; }
static void set_previous(node * __restrict ptr, node * __restrict prev) { ptr->prev = prev; }
};
template <typename TKey, typename TValue, typename Hash, bool save_hash_in_cells>
class LRUHashMapImpl :
private HashMapTable<
TKey,
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
Hash,
HashTableGrower<>,
HashTableAllocator>
{
using Base = HashMapTable<
TKey,
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
Hash,
HashTableGrower<>,
HashTableAllocator>;
public:
using Key = TKey;
using Value = TValue;
using Cell = LRUHashMapCell<Key, Value, Hash, save_hash_in_cells>;
using LRUHashMapCellIntrusiveValueTraits =
boost::intrusive::trivial_value_traits<
LRUHashMapCellNodeTraits<Key, Value, Hash, save_hash_in_cells>,
boost::intrusive::link_mode_type::normal_link>;
using LRUList = boost::intrusive::list<
Cell,
boost::intrusive::value_traits<LRUHashMapCellIntrusiveValueTraits>,
boost::intrusive::constant_time_size<false>>;
using iterator = typename LRUList::iterator;
using const_iterator = typename LRUList::const_iterator;
using reverse_iterator = typename LRUList::reverse_iterator;
using const_reverse_iterator = typename LRUList::const_reverse_iterator;
LRUHashMapImpl(size_t max_size_, bool preallocate_max_size_in_hash_map = false)
: Base(preallocate_max_size_in_hash_map ? max_size_ : 32)
, max_size(max_size_)
{
assert(max_size > 0);
}
std::pair<Cell *, bool> insert(const Key & key, const Value & value)
{
return emplace(key, value);
}
std::pair<Cell *, bool> insert(const Key & key, Value && value)
{
return emplace(key, std::move(value));
}
template<typename ...Args>
std::pair<Cell *, bool> emplace(const Key & key, Args&&... args)
{
size_t hash_value = Base::hash(key);
Cell * it = Base::find(key, hash_value);
if (it)
{
/// Cell contains element return it and put to the end of lru list
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
return std::make_pair(it, false);
}
if (size() == max_size)
{
/// Erase least recently used element from front of the list
Cell & node = lru_list.front();
const Key & element_to_remove_key = node.getKey();
size_t key_hash = node.getHash(*this);
lru_list.pop_front();
[[maybe_unused]] bool erased = Base::erase(element_to_remove_key, key_hash);
assert(erased);
}
[[maybe_unused]] bool inserted;
/// Insert value first try to insert in zero storage if not then insert in buffer
if (!Base::emplaceIfZero(key, it, inserted, hash_value))
Base::emplaceNonZero(key, it, inserted, hash_value);
assert(inserted);
new (&it->getMapped()) Value(std::forward<Args>(args)...);
/// Put cell to the end of lru list
lru_list.insert(lru_list.end(), *it);
return std::make_pair(it, true);
}
using Base::find;
Value & get(const Key & key)
{
auto it = Base::find(key);
assert(it);
Value & value = it->getMapped();
/// Put cell to the end of lru list
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
return value;
}
const Value & get(const Key & key) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
}
bool contains(const Key & key) const
{
return Base::has(key);
}
bool erase(const Key & key)
{
auto hash = Base::hash(key);
auto it = Base::find(key, hash);
if (!it)
return false;
lru_list.erase(lru_list.iterator_to(*it));
return Base::erase(key, hash);
}
void clear()
{
lru_list.clear();
Base::clear();
}
using Base::size;
size_t getMaxSize() const { return max_size; }
iterator begin() { return lru_list.begin(); }
const_iterator begin() const { return lru_list.cbegin(); }
iterator end() { return lru_list.end(); }
const_iterator end() const { return lru_list.cend(); }
reverse_iterator rbegin() { return lru_list.rbegin(); }
const_reverse_iterator rbegin() const { return lru_list.crbegin(); }
reverse_iterator rend() { return lru_list.rend(); }
const_reverse_iterator rend() const { return lru_list.crend(); }
private:
size_t max_size;
LRUList lru_list;
};
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
using LRUHashMap = LRUHashMapImpl<Key, Value, Hash, false>;
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Hash, true>;

View File

@ -99,6 +99,11 @@ ThreadStatus::~ThreadStatus()
/// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
}
#if !defined(ARCADIA_BUILD)
/// It may cause segfault if query_context was destroyed, but was not detached
assert((!query_context && query_id.empty()) || (query_context && query_id == query_context->getCurrentQueryId()));
#endif
if (deleter)
deleter();
current_thread = nullptr;

View File

@ -201,7 +201,7 @@ public:
void setFatalErrorCallback(std::function<void()> callback);
void onFatalError();
/// Sets query context for current thread and its thread group
/// Sets query context for current master thread and its thread group
/// NOTE: query_context have to be alive until detachQuery() is called
void attachQueryContext(Context & query_context);

View File

@ -38,6 +38,9 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
add_executable (pod_array pod_array.cpp)
target_link_libraries (pod_array PRIVATE clickhouse_common_io)
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)

View File

@ -0,0 +1,161 @@
#include <iomanip>
#include <iostream>
#include <Common/HashTable/LRUHashMap.h>
#include <gtest/gtest.h>
template<typename LRUHashMap>
std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
{
std::vector<typename LRUHashMap::Key> result;
result.reserve(map.size());
for (auto & node: map)
result.emplace_back(node.getKey());
return result;
}
void testInsert(size_t elements_to_insert_size, size_t map_size)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(map_size);
std::vector<int> expected;
for (size_t i = 0; i < elements_to_insert_size; ++i)
map.insert(i, i);
for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
expected.emplace_back(i);
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(map.size(), actual.size());
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, Insert)
{
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.insert(2, 2);
int v = 3;
map.insert(3, v);
map.emplace(4, 4);
std::vector<int> expected = { 2, 3, 4 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
testInsert(1200000, 1200000);
testInsert(10, 5);
testInsert(1200000, 2);
testInsert(1200000, 1);
}
TEST(LRUHashMap, GetModify)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.get(3) = 4;
std::vector<int> expected = { 1, 2, 4 };
std::vector<int> actual;
actual.reserve(map.size());
for (auto & node : map)
actual.emplace_back(node.getMapped());
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, SetRecentKeyToTop)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.emplace(1, 4);
std::vector<int> expected = { 2, 3, 1 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, GetRecentKeyToTop)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.get(1);
std::vector<int> expected = { 2, 3, 1 };
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
}
TEST(LRUHashMap, Contains)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
ASSERT_TRUE(map.contains(1));
ASSERT_TRUE(map.contains(2));
ASSERT_TRUE(map.contains(3));
ASSERT_EQ(map.size(), 3);
map.erase(1);
map.erase(2);
map.erase(3);
ASSERT_EQ(map.size(), 0);
ASSERT_FALSE(map.contains(1));
ASSERT_FALSE(map.contains(2));
ASSERT_FALSE(map.contains(3));
}
TEST(LRUHashMap, Clear)
{
using LRUHashMap = LRUHashMap<int, int>;
LRUHashMap map(3);
map.emplace(1, 1);
map.emplace(2, 2);
map.emplace(3, 3);
map.clear();
std::vector<int> expected = {};
std::vector<int> actual = convertToVector(map);
ASSERT_EQ(actual, expected);
ASSERT_EQ(map.size(), 0);
}

View File

@ -0,0 +1,244 @@
#include <vector>
#include <list>
#include <map>
#include <random>
#include <pcg_random.hpp>
#include <Common/Stopwatch.h>
#include <Common/HashTable/LRUHashMap.h>
template<class Key, class Value>
class LRUHashMapBasic
{
public:
using key_type = Key;
using value_type = Value;
using list_type = std::list<key_type>;
using node = std::pair<value_type, typename list_type::iterator>;
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
LRUHashMapBasic(size_t max_size_, bool preallocated)
: hash_map(preallocated ? max_size_ : 32)
, max_size(max_size_)
{
}
void insert(const Key &key, const Value &value)
{
auto it = hash_map.find(key);
if (it == hash_map.end())
{
if (size() >= max_size)
{
auto iterator_to_remove = list.begin();
hash_map.erase(*iterator_to_remove);
list.erase(iterator_to_remove);
}
list.push_back(key);
hash_map[key] = std::make_pair(value, --list.end());
}
else
{
auto & [value_to_update, iterator_in_list_to_update] = it->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = list.end();
value_to_update = value;
}
}
value_type & get(const key_type &key)
{
auto iterator_in_map = hash_map.find(key);
assert(iterator_in_map != hash_map.end());
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = list.end();
return value_to_return;
}
const value_type & get(const key_type & key) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
}
size_t getMaxSize() const
{
return max_size;
}
size_t size() const
{
return hash_map.size();
}
bool empty() const
{
return hash_map.empty();
}
bool contains(const Key & key)
{
return hash_map.find(key) != hash_map.end();
}
void clear()
{
hash_map.clear();
list.clear();
}
private:
map_type hash_map;
list_type list;
size_t max_size;
};
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
{
std::vector<UInt64> numbers;
numbers.reserve(numbers_to_insert_size);
std::random_device rd;
pcg64 gen(rd());
UInt64 min = std::numeric_limits<UInt64>::min();
UInt64 max = std::numeric_limits<UInt64>::max();
auto distribution = std::uniform_int_distribution<>(min, max);
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
UInt64 number = distribution(gen);
numbers.emplace_back(number);
}
return numbers;
}
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
HashMap<int, int> hash_map(preallocated ? map_size : 32);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto * it = hash_map.find(numbers_to_insert[i]);
if (it)
summ += it->getMapped();
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
}
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto it = hash_map.find(numbers_to_insert[i]);
if (it != hash_map.end())
summ += it->second;
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
}
template<typename LRUCache>
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
LRUCache cache(map_size, preallocated);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
}
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
if (cache.contains(numbers_to_insert[i]))
summ += cache.get(numbers_to_insert[i]);
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
return summ;
}
int main(int argc, char ** argv)
{
(void)(argc);
(void)(argv);
size_t hash_map_size = 1200000;
size_t numbers_to_insert_size = 12000000;
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
return 0;
}

View File

@ -1,18 +1,24 @@
#include <boost/program_options.hpp>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataTypes/DataTypeFactory.h>
#include <Storages/IStorage.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/ConstraintsDescription.h>
#include <Interpreters/Context.h>
#include <IO/copyData.h>
#include <Interpreters/DatabaseCatalog.h>
#include <IO/ReadBufferFromIStream.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/LimitReadBuffer.h>
#include <Storages/StorageMemory.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Pipe.h>
#include <Processors/Sources/SinkToOutputStream.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Core/ExternalTable.h>
#include <Poco/Net/MessageHeader.h>
#include <Formats/FormatFactory.h>
#include <common/find_symbols.h>
@ -39,7 +45,7 @@ ExternalTableDataPtr BaseExternalTable::getData(const Context & context)
return data;
}
void BaseExternalTable::clean()
void BaseExternalTable::clear()
{
name.clear();
file.clear();
@ -49,17 +55,6 @@ void BaseExternalTable::clean()
read_buffer.reset();
}
/// Function for debugging information output
void BaseExternalTable::write()
{
std::cerr << "file " << file << std::endl;
std::cerr << "name " << name << std::endl;
std::cerr << "format " << format << std::endl;
std::cerr << "structure: \n";
for (const auto & elem : structure)
std::cerr << '\t' << elem.first << ' ' << elem.second << std::endl;
}
void BaseExternalTable::parseStructureFromStructureField(const std::string & argument)
{
std::vector<std::string> vals;
@ -182,7 +177,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
executor->execute(/*num_threads = */ 1);
/// We are ready to receive the next file, for this we clear all the information received
clean();
clear();
}
}

View File

@ -61,10 +61,7 @@ public:
protected:
/// Clear all accumulated information
void clean();
/// Function for debugging information output
void write();
void clear();
/// Construct the `structure` vector from the text field `structure`
virtual void parseStructureFromStructureField(const std::string & argument);

View File

@ -86,8 +86,6 @@ class IColumn;
\
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
\
M(Milliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
\
M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
\
@ -377,6 +375,7 @@ class IColumn;
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
@ -420,6 +419,7 @@ class IColumn;
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \

View File

@ -1,13 +1,27 @@
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <Interpreters/addMissingDefaults.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
const BlockOutputStreamPtr & output_,
const Block & header_,
const ColumnsDescription & columns_,
const Context & context_)
: output(output_), header(header_)
{
auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
}
void AddingDefaultBlockOutputStream::write(const Block & block)
{
output->write(addMissingDefaults(block, output_block.getNamesAndTypesList(), columns, context));
auto copy = block;
adding_defaults_actions->execute(copy);
output->write(copy);
}
void AddingDefaultBlockOutputStream::flush()

View File

@ -8,6 +8,9 @@
namespace DB
{
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class Context;
/** This stream adds three types of columns into block
@ -22,13 +25,8 @@ public:
AddingDefaultBlockOutputStream(
const BlockOutputStreamPtr & output_,
const Block & header_,
const Block & output_block_,
const ColumnsDescription & columns_,
const Context & context_)
: output(output_), header(header_), output_block(output_block_),
columns(columns_), context(context_)
{
}
const Context & context_);
Block getHeader() const override { return header; }
void write(const Block & block) override;
@ -41,10 +39,7 @@ public:
private:
BlockOutputStreamPtr output;
const Block header;
/// Blocks after this stream should have this structure
const Block output_block;
const ColumnsDescription columns;
const Context & context;
ExpressionActionsPtr adding_defaults_actions;
};

View File

@ -171,7 +171,12 @@ Block AddingDefaultsBlockInputStream::readImpl()
if (!evaluate_block.columns())
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
if (dag)
{
auto actions = std::make_shared<ExpressionActions>(std::move(dag));
actions->execute(evaluate_block);
}
std::unordered_map<size_t, MutableColumnPtr> mixed_columns;

View File

@ -5,7 +5,6 @@
#include <Interpreters/Context.h>
#include <Core/Settings.h>
#include <DataStreams/MaterializingBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/NativeBlockInputStream.h>
#include <Formats/FormatSettings.h>
#include <Processors/Formats/IRowInputFormat.h>

View File

@ -116,8 +116,35 @@ public:
DataTypes argument_types = {nested_type};
Array params_row;
AggregateFunctionProperties properties;
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
AggregateFunctionPtr bitmap_function;
WhichDataType which(nested_type);
if (which.isUInt8())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt8>::name(), argument_types, params_row, properties);
else if (which.isUInt16())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt16>::name(), argument_types, params_row, properties);
else if (which.isUInt32())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
else if (which.isUInt64())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt64>::name(), argument_types, params_row, properties);
else if (which.isInt8())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<Int8>::name(), argument_types, params_row, properties);
else if (which.isInt16())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<Int16>::name(), argument_types, params_row, properties);
else if (which.isInt32())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<Int32>::name(), argument_types, params_row, properties);
else if (which.isInt64())
bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<Int64>::name(), argument_types, params_row, properties);
else
throw Exception(
"Unexpected type " + array_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeAggregateFunction>(bitmap_function, argument_types, params_row);
}
@ -141,6 +168,14 @@ public:
return executeBitmapData<UInt32>(argument_types, arguments);
else if (which.isUInt64())
return executeBitmapData<UInt64>(argument_types, arguments);
else if (which.isInt8())
return executeBitmapData<Int8>(argument_types, arguments);
else if (which.isInt16())
return executeBitmapData<Int16>(argument_types, arguments);
else if (which.isInt32())
return executeBitmapData<Int32>(argument_types, arguments);
else if (which.isInt64())
return executeBitmapData<Int64>(argument_types, arguments);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -161,7 +196,7 @@ private:
Array params_row;
AggregateFunctionProperties properties;
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
AggregateFunctionGroupBitmapData<T>::name(), argument_types, params_row, properties);
auto col_to = ColumnAggregateFunction::create(bitmap_function);
col_to->reserve(offsets.size());
@ -197,7 +232,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type && bitmap_type->getFunctionName() =="groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -230,6 +265,14 @@ public:
executeIntType<UInt32>(arguments, input_rows_count, res_data, res_offsets);
else if (which.isUInt64())
executeIntType<UInt64>(arguments, input_rows_count, res_data, res_offsets);
else if (which.isInt8())
executeIntType<Int8>(arguments, input_rows_count, res_data, res_offsets);
else if (which.isInt16())
executeIntType<Int16>(arguments, input_rows_count, res_data, res_offsets);
else if (which.isInt32())
executeIntType<Int32>(arguments, input_rows_count, res_data, res_offsets);
else if (which.isInt64())
executeIntType<Int64>(arguments, input_rows_count, res_data, res_offsets);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -279,7 +322,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -312,6 +355,14 @@ public:
return executeIntType<UInt32>(arguments, input_rows_count);
else if (which.isUInt64())
return executeIntType<UInt64>(arguments, input_rows_count);
else if (which.isInt8())
return executeIntType<Int8>(arguments, input_rows_count);
else if (which.isInt16())
return executeIntType<Int16>(arguments, input_rows_count);
else if (which.isInt32())
return executeIntType<Int32>(arguments, input_rows_count);
else if (which.isInt64())
return executeIntType<Int64>(arguments, input_rows_count);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -384,7 +435,11 @@ struct BitmapSubsetInRangeImpl
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
UInt64 range_start,
UInt64 range_end,
AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
}
@ -395,7 +450,11 @@ struct BitmapSubsetLimitImpl
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
static void apply(
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
UInt64 range_start,
UInt64 range_end,
AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
{
bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
}
@ -421,7 +480,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -456,6 +515,14 @@ public:
return executeIntType<UInt32>(arguments, input_rows_count);
else if (which.isUInt64())
return executeIntType<UInt64>(arguments, input_rows_count);
else if (which.isInt8())
return executeIntType<Int8>(arguments, input_rows_count);
else if (which.isInt16())
return executeIntType<Int16>(arguments, input_rows_count);
else if (which.isInt32())
return executeIntType<Int32>(arguments, input_rows_count);
else if (which.isInt64())
return executeIntType<Int64>(arguments, input_rows_count);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -579,7 +646,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -604,6 +671,14 @@ public:
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
else if (which.isUInt64())
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
else if (which.isInt8())
executeIntType<Int8>(arguments, input_rows_count, vec_to);
else if (which.isInt16())
executeIntType<Int16>(arguments, input_rows_count, vec_to);
else if (which.isInt32())
executeIntType<Int32>(arguments, input_rows_count, vec_to);
else if (which.isInt64())
executeIntType<Int64>(arguments, input_rows_count, vec_to);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -743,15 +818,15 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
WhichDataType which(arguments[1].get());
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
if (!which.isNativeInt() && !which.isNativeUInt())
throw Exception(
"Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
"Second argument for function " + getName() + " must be an native integer type but it has type " + arguments[1]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<UInt8>>();
@ -775,6 +850,14 @@ public:
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
else if (which.isUInt64())
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
else if (which.isInt8())
executeIntType<Int8>(arguments, input_rows_count, vec_to);
else if (which.isInt16())
executeIntType<Int16>(arguments, input_rows_count, vec_to);
else if (which.isInt32())
executeIntType<Int32>(arguments, input_rows_count, vec_to);
else if (which.isInt64())
executeIntType<Int64>(arguments, input_rows_count, vec_to);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -839,15 +922,15 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
throw Exception(
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@ -877,6 +960,14 @@ public:
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
else if (which.isUInt64())
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
else if (which.isInt8())
executeIntType<Int8>(arguments, input_rows_count, vec_to);
else if (which.isInt16())
executeIntType<Int16>(arguments, input_rows_count, vec_to);
else if (which.isInt32())
executeIntType<Int32>(arguments, input_rows_count, vec_to);
else if (which.isInt64())
executeIntType<Int64>(arguments, input_rows_count, vec_to);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -974,15 +1065,15 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
throw Exception(
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
throw Exception(
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
@ -1009,6 +1100,14 @@ public:
return executeBitmapData<UInt32>(arguments, input_rows_count);
else if (which.isUInt64())
return executeBitmapData<UInt64>(arguments, input_rows_count);
else if (which.isUInt8())
return executeBitmapData<UInt8>(arguments, input_rows_count);
else if (which.isUInt16())
return executeBitmapData<UInt16>(arguments, input_rows_count);
else if (which.isUInt32())
return executeBitmapData<UInt32>(arguments, input_rows_count);
else if (which.isUInt64())
return executeBitmapData<UInt64>(arguments, input_rows_count);
else
throw Exception(
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -83,9 +83,9 @@ struct ArrayDifferenceImpl
}
res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
return true;
}
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
ColumnPtr res;
@ -107,7 +107,6 @@ struct ArrayDifferenceImpl
else
throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
}
};
struct NameArrayDifference { static constexpr auto name = "arrayDifference"; };

View File

@ -27,6 +27,8 @@ public:
return name;
}
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
size_t getNumberOfArguments() const override
{
return 1;

View File

@ -1,3 +1,4 @@
#include <Functions/replicate.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
@ -11,60 +12,50 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
}
namespace
DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) const
{
if (arguments.size() < 2)
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} expect at least two arguments, got {}", getName(), arguments.size());
/** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
*/
class FunctionReplicate : public IFunction
{
public:
static constexpr auto name = "replicate";
static FunctionPtr create(const Context &)
for (size_t i = 1; i < arguments.size(); ++i)
{
return std::make_shared<FunctionReplicate>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 2;
}
bool useDefaultImplementationForNulls() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
if (!array_type)
throw Exception("Second argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeArray>(arguments[0]);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument {} for function {} must be array.",
i + 1, getName());
}
return std::make_shared<DataTypeArray>(arguments[0]);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr FunctionReplicate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const
{
ColumnPtr first_column = arguments[0].column;
ColumnPtr offsets;
for (size_t i = 1; i < arguments.size(); ++i)
{
ColumnPtr first_column = arguments[0].column;
const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[1].column.get());
const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
ColumnPtr temp_column;
if (!array_column)
{
const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
if (!const_array_column)
throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
temp_column = const_array_column->convertToFullColumn();
array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
}
return ColumnArray::create(first_column->replicate(array_column->getOffsets())->convertToFullColumnIfConst(), array_column->getOffsetsPtr());
}
};
if (!offsets || offsets->empty())
offsets = array_column->getOffsetsPtr();
}
const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
}
void registerFunctionReplicate(FunctionFactory & factory)

40
src/Functions/replicate.h Normal file
View File

@ -0,0 +1,40 @@
#pragma once
#include <Functions/IFunctionImpl.h>
namespace DB
{
class Context;
/// Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
/// Function may accept more then two arguments. If so, the first array with non-empty offsets is chosen.
class FunctionReplicate : public IFunction
{
public:
static constexpr auto name = "replicate";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionReplicate>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 0;
}
bool isVariadic() const override { return true; }
bool useDefaultImplementationForNulls() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override;
};
}

View File

@ -13,7 +13,7 @@ namespace DB
*
* While using this object, no other allocations in arena are possible.
*/
class WriteBufferFromArena : public WriteBuffer
class WriteBufferFromArena final : public WriteBuffer
{
private:
Arena & arena;

View File

@ -39,16 +39,26 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
for (const auto & input : inputs_)
{
if (input.column && isColumnConst(*input.column))
{
addInput(input, true);
/// Here we also add column.
/// It will allow to remove input which is actually constant (after projection).
/// Also, some transforms from query pipeline may randomly materialize constants,
/// without any respect to header structure. So, it is a way to drop materialized column and use
/// constant value from header.
/// We cannot remove such input right now cause inputs positions are important in some cases.
addColumn(input, true);
}
else
addInput(input.name, input.type, true);
}
}
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
{
auto it = index.find(node.result_name);
if (it != index.end() && !can_replace)
if (it != index.end() && !can_replace && add_to_index)
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
auto & res = nodes.emplace_back(std::move(node));
@ -56,7 +66,8 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
if (res.type == ActionType::INPUT)
inputs.emplace_back(&res);
index.replace(&res);
if (add_to_index)
index.replace(&res);
return res;
}
@ -90,7 +101,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
return addNode(std::move(node), can_replace);
}
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace)
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
{
if (!column.column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@ -101,7 +112,22 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
node.result_name = std::move(column.name);
node.column = std::move(column.column);
return addNode(std::move(node), can_replace);
auto * res = &addNode(std::move(node), can_replace, !materialize);
if (materialize)
{
auto & name = res->result_name;
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
res = &addFunction(func_builder_materialize, {res}, {}, true, false);
res = &addAlias(*res, name, true);
}
return *res;
}
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
@ -116,7 +142,6 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
node.result_type = child.result_type;
node.result_name = std::move(alias);
node.column = child.column;
node.allow_constant_folding = child.allow_constant_folding;
node.children.emplace_back(&child);
return addNode(std::move(node), can_replace);
@ -143,7 +168,8 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context [[maybe_unused]])
const Context & context [[maybe_unused]],
bool can_replace)
{
const auto & all_settings = context.getSettingsRef();
settings.max_temporary_columns = all_settings.max_temporary_columns;
@ -162,14 +188,15 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
for (const auto & name : argument_names)
children.push_back(&getNode(name));
return addFunction(function, children, std::move(result_name), false);
return addFunction(function, children, std::move(result_name), can_replace);
}
ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace)
bool can_replace,
bool add_to_index)
{
size_t num_arguments = children.size();
@ -184,7 +211,6 @@ ActionsDAG::Node & ActionsDAG::addFunction(
for (size_t i = 0; i < num_arguments; ++i)
{
auto & child = *node.children[i];
node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
ColumnWithTypeAndName argument;
argument.column = child.column;
@ -250,7 +276,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.result_name = std::move(result_name);
return addNode(std::move(node), can_replace);
return addNode(std::move(node), can_replace, add_to_index);
}
@ -349,10 +375,15 @@ void ActionsDAG::removeUnusedActions()
stack.push(node);
}
/// We cannot remove arrayJoin because it changes the number of rows.
for (auto & node : nodes)
{
if (node.type == ActionType::ARRAY_JOIN && visited_nodes.count(&node) == 0)
/// We cannot remove function with side effects even if it returns constant (e.g. ignore(...)).
bool prevent_constant_folding = node.column && isColumnConst(*node.column) && !node.allow_constant_folding;
/// We cannot remove arrayJoin because it changes the number of rows.
bool is_array_join = node.type == ActionType::ARRAY_JOIN;
bool must_keep_node = is_array_join || prevent_constant_folding;
if (must_keep_node && visited_nodes.count(&node) == 0)
{
visited_nodes.insert(&node);
stack.push(&node);
@ -410,7 +441,6 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
node.result_type = child->result_type;
node.result_name = std::move(item.second);
node.column = child->column;
node.allow_constant_folding = child->allow_constant_folding;
node.children.emplace_back(child);
auto & alias = addNode(std::move(node), true);
@ -625,6 +655,26 @@ bool ActionsDAG::trivial() const
return true;
}
void ActionsDAG::addMaterializingOutputActions()
{
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
Index new_index;
std::vector<Node *> index_nodes(index.begin(), index.end());
for (auto * node : index_nodes)
{
auto & name = node->result_name;
node = &addFunction(func_builder_materialize, {node}, {}, true, false);
node = &addAlias(*node, name, true);
new_index.insert(node);
}
index.swap(new_index);
}
ActionsDAGPtr ActionsDAG::makeConvertingActions(
const ColumnsWithTypeAndName & source,
const ColumnsWithTypeAndName & result,
@ -734,6 +784,23 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
return actions_dag;
}
ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
{
auto adding_column_action = std::make_shared<ActionsDAG>();
FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>()));
auto column_name = column.name;
const auto & column_node = adding_column_action->addColumn(std::move(column));
Inputs inputs = {const_cast<Node *>(&column_node)};
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
adding_column_action->addAlias(function_node, std::move(column_name), true);
return adding_column_action;
}
ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
{
/// first: x (1), x (2), y ==> x (2), z, x (3)

View File

@ -198,14 +198,15 @@ public:
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false);
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
const Node & addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
const Context & context);
const Context & context,
bool can_replace = false);
/// Call addAlias several times.
void addAliases(const NamesWithAliases & aliases);
@ -232,6 +233,9 @@ public:
ActionsDAGPtr clone() const;
/// For apply materialize() function for every output.
/// Also add aliases so the result names remain unchanged.
void addMaterializingOutputActions();
enum class MatchColumnsMode
{
@ -250,6 +254,9 @@ public:
MatchColumnsMode mode,
bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
/// Create expression which add const column and then materialize it.
static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
/// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
/// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
/// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.
@ -272,7 +279,7 @@ public:
SplitResult splitActionsForFilter(const std::string & column_name) const;
private:
Node & addNode(Node node, bool can_replace = false);
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
Node & getNode(const std::string & name);
Node & addAlias(Node & child, std::string alias, bool can_replace);
@ -280,7 +287,8 @@ private:
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace);
bool can_replace,
bool add_to_index = true);
ActionsDAGPtr cloneEmpty() const
{

View File

@ -7,6 +7,8 @@
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
#include <Poco/String.h>
namespace DB
{
@ -89,15 +91,18 @@ const String & changeNameIfNeeded(const String & func_name, const String & child
ASTPtr tryExchangeFunctions(const ASTFunction & func)
{
static const std::unordered_map<String, std::unordered_set<String>> supported = {
{ "sum", { "multiply", "divide" } },
{ "min", { "multiply", "divide", "plus", "minus" } },
{ "max", { "multiply", "divide", "plus", "minus" } }
};
static const std::unordered_map<String, std::unordered_set<String>> supported
= {{"sum", {"multiply", "divide"}},
{"min", {"multiply", "divide", "plus", "minus"}},
{"max", {"multiply", "divide", "plus", "minus"}},
{"avg", {"multiply", "divide", "plus", "minus"}}};
/// Aggregate functions[sum|min|max|avg] is case-insensitive, so we use lower cases name
auto lower_name = Poco::toLower(func.name);
const ASTFunction * child_func = getInternalFunction(func);
if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 ||
!supported.count(func.name) || !supported.find(func.name)->second.count(child_func->name))
if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 || !supported.count(lower_name)
|| !supported.find(lower_name)->second.count(child_func->name))
return {};
/// Cannot rewrite function with alias cause alias could become undefined
@ -116,12 +121,12 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
if (child_func->name == "divide")
return {};
const String & new_name = changeNameIfNeeded(func.name, child_func->name, *first_literal);
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal);
optimized_ast = exchangeExtractFirstArgument(new_name, *child_func);
}
else if (second_literal) /// second or both are consts
{
const String & new_name = changeNameIfNeeded(func.name, child_func->name, *second_literal);
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal);
optimized_ast = exchangeExtractSecondArgument(new_name, *child_func);
}

View File

@ -11,7 +11,7 @@ class ASTFunction;
/// Extract constant arguments out of aggregate functions from child functions
/// 'sum(a * 2)' -> 'sum(a) * 2'
/// Rewrites: sum([multiply|divide]) -> [multiply|divide](sum)
/// [min|max]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max])
/// [min|max|avg]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max|avg])
/// TODO: groupBitAnd, groupBitOr, groupBitXor
/// TODO: better constant detection: f(const) is not detected as const.
/// TODO: 'f((2 * n) * n)' -> '2 * f(n * n)'

View File

@ -331,7 +331,7 @@ struct ContextShared
mutable std::optional<ExternalModelsLoader> external_models_loader;
String default_profile_name; /// Default profile name used for default values.
String system_profile_name; /// Profile used by system processes
AccessControlManager access_control_manager;
std::unique_ptr<AccessControlManager> access_control_manager;
mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks.
mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files.
ProcessList process_list; /// Executing queries at the moment.
@ -388,7 +388,8 @@ struct ContextShared
Context::ConfigReloadCallback config_reload_callback;
ContextShared()
: macros(std::make_unique<Macros>())
: access_control_manager(std::make_unique<AccessControlManager>())
, macros(std::make_unique<Macros>())
{
/// TODO: make it singleton (?)
static std::atomic<size_t> num_calls{0};
@ -434,6 +435,7 @@ struct ContextShared
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
/// TODO: Get rid of this.
access_control_manager.reset();
system_logs.reset();
embedded_dictionaries.reset();
external_dictionaries_loader.reset();
@ -640,7 +642,7 @@ void Context::setConfig(const ConfigurationPtr & config)
{
auto lock = getLock();
shared->config = config;
shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config);
shared->access_control_manager->setExternalAuthenticatorsConfig(*shared->config);
}
const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
@ -652,25 +654,25 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
AccessControlManager & Context::getAccessControlManager()
{
return shared->access_control_manager;
return *shared->access_control_manager;
}
const AccessControlManager & Context::getAccessControlManager() const
{
return shared->access_control_manager;
return *shared->access_control_manager;
}
void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
{
auto lock = getLock();
shared->access_control_manager.setExternalAuthenticatorsConfig(config);
shared->access_control_manager->setExternalAuthenticatorsConfig(config);
}
void Context::setUsersConfig(const ConfigurationPtr & config)
{
auto lock = getLock();
shared->users_config = config;
shared->access_control_manager.setUsersConfig(*shared->users_config);
shared->access_control_manager->setUsersConfig(*shared->users_config);
}
ConfigurationPtr Context::getUsersConfig()
@ -1138,12 +1140,6 @@ String Context::getCurrentDatabase() const
}
String Context::getCurrentQueryId() const
{
return client_info.current_query_id;
}
String Context::getInitialQueryId() const
{
return client_info.initial_query_id;

View File

@ -441,7 +441,7 @@ public:
StoragePtr getViewSource();
String getCurrentDatabase() const;
String getCurrentQueryId() const;
String getCurrentQueryId() const { return client_info.current_query_id; }
/// Id of initiating query for distributed queries; or current query id if it's not a distributed query.
String getInitialQueryId() const;

View File

@ -610,12 +610,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
ReadBufferFromString istr(query_to_execute);
String dummy_string;
WriteBufferFromString ostr(dummy_string);
std::optional<CurrentThread::QueryScope> query_scope;
try
{
auto current_context = std::make_unique<Context>(context);
current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
current_context->setCurrentQueryId(""); // generate random query_id
query_scope.emplace(*current_context);
executeQuery(istr, ostr, false, *current_context, {});
}
catch (...)
@ -632,20 +634,6 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
return true;
}
void DDLWorker::attachToThreadGroup()
{
if (thread_group)
{
/// Put all threads to one thread pool
CurrentThread::attachToIfDetached(thread_group);
}
else
{
CurrentThread::initializeQuery();
thread_group = CurrentThread::getGroup();
}
}
void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
{
@ -1148,8 +1136,6 @@ void DDLWorker::runMainThread()
{
try
{
attachToThreadGroup();
cleanup_event->set();
scheduleTasks();
@ -1217,7 +1203,7 @@ void DDLWorker::runCleanupThread()
}
class DDLQueryStatusInputStream : public IBlockInputStream
class DDLQueryStatusInputStream final : public IBlockInputStream
{
public:

View File

@ -162,8 +162,6 @@ private:
void runMainThread();
void runCleanupThread();
void attachToThreadGroup();
private:
Context context;
Poco::Logger * log;
@ -196,8 +194,6 @@ private:
/// How many tasks could be in the queue
size_t max_tasks_in_queue = 1000;
ThreadGroupStatusPtr thread_group;
std::atomic<UInt64> max_id = 0;
friend class DDLQueryStatusInputStream;

View File

@ -37,7 +37,7 @@ void DNSCacheUpdater::run()
* - automatically throttle when DNS requests take longer time;
* - add natural randomization on huge clusters - avoid sending all requests at the same moment of time from different servers.
*/
task_handle->scheduleAfter(update_period_seconds * 1000);
task_handle->scheduleAfter(size_t(update_period_seconds) * 1000);
}
void DNSCacheUpdater::start()

View File

@ -96,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
ASTPtr subquery_select = subquery.children.at(0);
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1);
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
options.analyze(data.only_analyze);
auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options);

View File

@ -12,7 +12,6 @@
#include <Core/Block.h>
#include <Core/NamesAndTypes.h>
#include <Databases/IDatabase.h>
#include <Storages/StorageMemory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/IdentifierSemantic.h>

Some files were not shown because too many files have changed in this diff Show More