mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into master
This commit is contained in:
commit
32b8bd7428
32
.github/workflows/codeql-analysis.yml
vendored
32
.github/workflows/codeql-analysis.yml
vendored
@ -1,32 +0,0 @@
|
||||
# See the example here: https://github.com/github/codeql-action
|
||||
|
||||
name: "CodeQL Scanning"
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 19 * * *'
|
||||
jobs:
|
||||
CodeQL-Build:
|
||||
|
||||
runs-on: self-hosted
|
||||
timeout-minutes: 1440
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 2
|
||||
submodules: 'recursive'
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
|
||||
with:
|
||||
languages: cpp
|
||||
|
||||
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
|
||||
- run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
|
||||
- run: cd build && ninja
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
@ -11,7 +11,6 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
|
@ -986,7 +986,7 @@ void BaseDaemon::setupWatchdog()
|
||||
if (errno == ECHILD)
|
||||
{
|
||||
logger().information("Child process no longer exists.");
|
||||
_exit(status);
|
||||
_exit(WEXITSTATUS(status));
|
||||
}
|
||||
|
||||
if (WIFEXITED(status))
|
||||
@ -1020,7 +1020,7 @@ void BaseDaemon::setupWatchdog()
|
||||
|
||||
/// Automatic restart is not enabled but you can play with it.
|
||||
#if 1
|
||||
_exit(status);
|
||||
_exit(WEXITSTATUS(status));
|
||||
#else
|
||||
logger().information("Will restart.");
|
||||
if (argv0)
|
||||
|
@ -120,7 +120,7 @@ function clone_root
|
||||
git checkout FETCH_HEAD
|
||||
echo 'Clonned merge head'
|
||||
else
|
||||
git fetch
|
||||
git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head"
|
||||
git checkout "$COMMIT_SHA"
|
||||
echo 'Checked out to commit'
|
||||
fi
|
||||
|
@ -190,7 +190,7 @@ case "$stage" in
|
||||
# Lost connection to the server. This probably means that the server died
|
||||
# with abort.
|
||||
echo "failure" > status.txt
|
||||
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
|
||||
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
|
||||
then
|
||||
echo "Lost connection to server. See the logs." > description.txt
|
||||
fi
|
||||
|
@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
|
||||
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
|
||||
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
|
||||
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
|
||||
parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
@ -323,7 +324,7 @@ for query_index in queries_to_run:
|
||||
server_seconds += elapsed
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||
|
||||
if elapsed > 10:
|
||||
if elapsed > args.max_query_seconds:
|
||||
# Stop processing pathologically slow queries, to avoid timing out
|
||||
# the entire test task. This shouldn't really happen, so we don't
|
||||
# need much handling for this case and can just exit.
|
||||
|
@ -5,7 +5,10 @@ RUN apt-get update -y && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
python3-wheel
|
||||
python3-wheel \
|
||||
brotli \
|
||||
netcat-openbsd \
|
||||
zstd
|
||||
|
||||
RUN python3 -m pip install \
|
||||
wheel \
|
||||
@ -15,7 +18,10 @@ RUN python3 -m pip install \
|
||||
pytest-randomly \
|
||||
pytest-rerunfailures \
|
||||
pytest-timeout \
|
||||
pytest-xdist
|
||||
pytest-xdist \
|
||||
pandas \
|
||||
numpy \
|
||||
scipy
|
||||
|
||||
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
|
||||
|
@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause
|
||||
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
|
||||
- Replication can be easily broken.
|
||||
- Manual operations on database and tables are forbidden.
|
||||
- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes.
|
||||
|
||||
## Examples of Use {#examples-of-use}
|
||||
|
||||
@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
|
||||
└───┴─────┴──────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) <!--hide-->
|
||||
[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) <!--hide-->
|
||||
|
@ -12,6 +12,9 @@ List of supported integrations:
|
||||
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
|
||||
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
|
||||
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
|
||||
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
|
||||
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
|
||||
- [S3](../../../engines/table-engines/integrations/s3.md)
|
||||
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
|
||||
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
|
||||
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
|
||||
|
57
docs/en/engines/table-engines/integrations/mongodb.md
Normal file
57
docs/en/engines/table-engines/integrations/mongodb.md
Normal file
@ -0,0 +1,57 @@
|
||||
---
|
||||
toc_priority: 7
|
||||
toc_title: MongoDB
|
||||
---
|
||||
|
||||
# MongoDB {#mongodb}
|
||||
|
||||
MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
(
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = MongoDB(host:port, database, collection, user, password);
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — MongoDB server address.
|
||||
|
||||
- `database` — Remote database name.
|
||||
|
||||
- `collection` — Remote collection name.
|
||||
|
||||
- `user` — MongoDB user.
|
||||
|
||||
- `password` — User password.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Table in ClickHouse which allows to read data from MongoDB collection:
|
||||
|
||||
``` text
|
||||
CREATE TABLE mongo_table
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT COUNT() FROM mongo_table;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 4 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) <!--hide-->
|
@ -104,7 +104,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
|
||||
- `max_parts_in_total` — Maximum number of parts in all partitions.
|
||||
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
|
||||
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
|
||||
|
||||
- `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting.
|
||||
|
||||
**Example of Sections Setting**
|
||||
|
||||
``` sql
|
||||
|
@ -81,6 +81,7 @@ toc_title: Adopters
|
||||
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
|
||||
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
|
||||
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
|
||||
| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
|
||||
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
|
||||
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
|
||||
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
|
||||
|
@ -29,6 +29,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas.
|
||||
|
||||
<!-- Unlimited. Just collect data for the specified time interval. -->
|
||||
<queries>0</queries>
|
||||
<query_selects>0</query_selects>
|
||||
<query_inserts>0</query_inserts>
|
||||
<errors>0</errors>
|
||||
<result_rows>0</result_rows>
|
||||
<read_rows>0</read_rows>
|
||||
@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo
|
||||
<duration>3600</duration>
|
||||
|
||||
<queries>1000</queries>
|
||||
<query_selects>100</query_selects>
|
||||
<query_inserts>100</query_inserts>
|
||||
<errors>100</errors>
|
||||
<result_rows>1000000000</result_rows>
|
||||
<read_rows>100000000000</read_rows>
|
||||
@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo
|
||||
<duration>86400</duration>
|
||||
|
||||
<queries>10000</queries>
|
||||
<query_selects>10000</query_selects>
|
||||
<query_inserts>10000</query_inserts>
|
||||
<errors>1000</errors>
|
||||
<result_rows>5000000000</result_rows>
|
||||
<read_rows>500000000000</read_rows>
|
||||
@ -74,6 +80,10 @@ Here are the amounts that can be restricted:
|
||||
|
||||
`queries` – The total number of requests.
|
||||
|
||||
`query_selects` – The total number of select requests.
|
||||
|
||||
`query_inserts` – The total number of insert requests.
|
||||
|
||||
`errors` – The number of queries that threw an exception.
|
||||
|
||||
`result_rows` – The total number of rows given as a result.
|
||||
|
@ -186,5 +186,16 @@ Possible values:
|
||||
Default value: auto (number of CPU cores).
|
||||
|
||||
During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
|
||||
## max_partitions_to_read {#max-partitions-to-read}
|
||||
|
||||
Limits the maximum number of partitions that can be accessed in one query.
|
||||
|
||||
The setting value specified when the table is created can be overridden via query-level setting.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any positive integer.
|
||||
|
||||
Default value: -1 (unlimited).
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->
|
||||
|
@ -2592,4 +2592,58 @@ Possible values:
|
||||
|
||||
Default value: `16`.
|
||||
|
||||
## optimize_on_insert {#optimize-on-insert}
|
||||
|
||||
Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 1.
|
||||
|
||||
**Example**
|
||||
|
||||
The difference between enabled and disabled:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SET optimize_on_insert = 1;
|
||||
|
||||
CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
|
||||
|
||||
INSERT INTO test1 SELECT number % 2 FROM numbers(5);
|
||||
|
||||
SELECT * FROM test1;
|
||||
|
||||
SET optimize_on_insert = 0;
|
||||
|
||||
CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
|
||||
|
||||
INSERT INTO test2 SELECT number % 2 FROM numbers(5);
|
||||
|
||||
SELECT * FROM test2;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─FirstTable─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
└────────────┘
|
||||
|
||||
┌─SecondTable─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
│ 1 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../
|
||||
|
||||
The `system.part_log` table contains the following columns:
|
||||
|
||||
- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
|
||||
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
|
||||
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
|
||||
- `NEW_PART` — Inserting of a new data part.
|
||||
- `MERGE_PARTS` — Merging of data parts.
|
||||
- `DOWNLOAD_PART` — Downloading a data part.
|
||||
- `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
|
||||
- `MUTATE_PART` — Mutating of a data part.
|
||||
- `MOVE_PART` — Moving the data part from the one disk to another one.
|
||||
- `event_date` (Date) — Event date.
|
||||
- `event_time` (DateTime) — Event time.
|
||||
- `duration_ms` (UInt64) — Duration.
|
||||
- `database` (String) — Name of the database the data part is in.
|
||||
- `table` (String) — Name of the table the data part is in.
|
||||
- `part_name` (String) — Name of the data part.
|
||||
- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`.
|
||||
- `rows` (UInt64) — The number of rows in the data part.
|
||||
- `size_in_bytes` (UInt64) — Size of the data part in bytes.
|
||||
- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
|
||||
- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
|
||||
- `read_rows` (UInt64) — The number of rows was read during the merge.
|
||||
- `read_bytes` (UInt64) — The number of bytes was read during the merge.
|
||||
- `error` (UInt16) — The code number of the occurred error.
|
||||
- `exception` (String) — Text message of the occurred error.
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision.
|
||||
|
||||
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration.
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in.
|
||||
- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part.
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`.
|
||||
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files.
|
||||
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part.
|
||||
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes.
|
||||
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge).
|
||||
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes.
|
||||
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge.
|
||||
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge.
|
||||
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
|
||||
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error.
|
||||
|
||||
The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
|
||||
event_type: NewPart
|
||||
event_date: 2021-02-02
|
||||
event_time: 2021-02-02 11:14:28
|
||||
event_time_microseconds: 2021-02-02 11:14:28.861919
|
||||
duration_ms: 35
|
||||
database: default
|
||||
table: log_mt_2
|
||||
part_name: all_1_1_0
|
||||
partition_id: all
|
||||
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
|
||||
rows: 115418
|
||||
size_in_bytes: 1074311
|
||||
merged_from: []
|
||||
bytes_uncompressed: 0
|
||||
read_rows: 0
|
||||
read_bytes: 0
|
||||
peak_memory_usage: 0
|
||||
error: 0
|
||||
exception:
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) <!--hide-->
|
||||
|
@ -9,6 +9,8 @@ Columns:
|
||||
- `0` — Interval is not randomized.
|
||||
- `1` — Interval is randomized.
|
||||
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
|
||||
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries.
|
||||
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries.
|
||||
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
|
||||
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
|
||||
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.
|
||||
|
@ -9,6 +9,8 @@ Columns:
|
||||
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
|
||||
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
|
||||
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
|
||||
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval.
|
||||
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval.
|
||||
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
|
||||
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
|
||||
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
|
||||
|
@ -11,6 +11,10 @@ Columns:
|
||||
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
|
||||
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
|
||||
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
|
||||
- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval.
|
||||
- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests.
|
||||
- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval.
|
||||
- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests.
|
||||
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
|
||||
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
|
||||
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
|
||||
|
@ -1,12 +1,16 @@
|
||||
# system.zookeeper {#system-zookeeper}
|
||||
|
||||
The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config.
|
||||
The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for.
|
||||
The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for.
|
||||
|
||||
The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node.
|
||||
To output data for all root nodes, write path = ‘/’.
|
||||
If the path specified in ‘path’ doesn’t exist, an exception will be thrown.
|
||||
|
||||
The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node.
|
||||
If in the specified ‘path’ collection has doesn't exist path, an exception will be thrown.
|
||||
It can be used to do a batch of ZooKeeper path queries.
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` (String) — The name of the node.
|
||||
|
@ -4,13 +4,42 @@ toc_priority: 106
|
||||
|
||||
# argMax {#agg-function-argmax}
|
||||
|
||||
Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
|
||||
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
|
||||
|
||||
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
|
||||
Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
|
||||
|
||||
Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
|
||||
**Syntax**
|
||||
|
||||
**Example:**
|
||||
``` sql
|
||||
argMax(arg, val)
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
``` sql
|
||||
argMax(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `arg` — Argument.
|
||||
- `val` — Value.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `arg` value that corresponds to maximum `val` value.
|
||||
|
||||
Type: matches `arg` type.
|
||||
|
||||
For tuple in the input:
|
||||
|
||||
- Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value.
|
||||
|
||||
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─user─────┬─salary─┐
|
||||
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu
|
||||
└──────────┴────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
|
||||
|
@ -4,13 +4,42 @@ toc_priority: 105
|
||||
|
||||
# argMin {#agg-function-argmin}
|
||||
|
||||
Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
|
||||
Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
|
||||
|
||||
Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
|
||||
Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
|
||||
|
||||
Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
|
||||
**Syntax**
|
||||
|
||||
**Example:**
|
||||
``` sql
|
||||
argMin(arg, val)
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
``` sql
|
||||
argMin(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `arg` — Argument.
|
||||
- `val` — Value.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `arg` value that corresponds to minimum `val` value.
|
||||
|
||||
Type: matches `arg` type.
|
||||
|
||||
For tuple in the input:
|
||||
|
||||
- Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value.
|
||||
|
||||
Type: [Tuple](../../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─user─────┬─salary─┐
|
||||
@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu
|
||||
└──────────┴────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
|
||||
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
|
||||
│ worker │ ('worker',1000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
|
||||
|
@ -0,0 +1,19 @@
|
||||
---
|
||||
toc_priority: 141
|
||||
---
|
||||
|
||||
# deltaSum {#agg_functions-deltasum}
|
||||
|
||||
Syntax: `deltaSum(value)`
|
||||
|
||||
Adds the differences between consecutive rows. If the difference is negative, it is ignored.
|
||||
`value` must be some integer or floating point type.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
select deltaSum(arrayJoin([1, 2, 3])); -- => 2
|
||||
select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7
|
||||
select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25
|
||||
```
|
||||
|
@ -0,0 +1,71 @@
|
||||
---
|
||||
toc_priority: 310
|
||||
toc_title: mannWhitneyUTest
|
||||
---
|
||||
|
||||
# mannWhitneyUTest {#mannwhitneyutest}
|
||||
|
||||
Applies the Mann-Whitney rank test to samples from two populations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
|
||||
```
|
||||
|
||||
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
|
||||
The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
- `'two-sided'`;
|
||||
- `'greater'`;
|
||||
- `'less'`.
|
||||
- `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 10 │ 0 │
|
||||
│ 11 │ 0 │
|
||||
│ 12 │ 0 │
|
||||
│ 1 │ 1 │
|
||||
│ 2 │ 1 │
|
||||
│ 3 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
|
||||
│ (9,0.04042779918503192) │
|
||||
└────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test)
|
||||
- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
|
@ -0,0 +1,65 @@
|
||||
---
|
||||
toc_priority: 300
|
||||
toc_title: studentTTest
|
||||
---
|
||||
|
||||
# studentTTest {#studentttest}
|
||||
|
||||
Applies Student's t-test to samples from two populations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
studentTTest(sample_data, sample_index)
|
||||
```
|
||||
|
||||
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
|
||||
The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 20.3 │ 0 │
|
||||
│ 21.1 │ 0 │
|
||||
│ 21.9 │ 1 │
|
||||
│ 21.7 │ 0 │
|
||||
│ 19.9 │ 1 │
|
||||
│ 21.8 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─studentTTest(sample_data, sample_index)───┐
|
||||
│ (-0.21739130434783777,0.8385421208415731) │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test)
|
||||
- [welchTTest function](welchttest.md#welchttest)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
|
@ -0,0 +1,65 @@
|
||||
---
|
||||
toc_priority: 301
|
||||
toc_title: welchTTest
|
||||
---
|
||||
|
||||
# welchTTest {#welchttest}
|
||||
|
||||
Applies Welch's t-test to samples from two populations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
welchTTest(sample_data, sample_index)
|
||||
```
|
||||
|
||||
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
|
||||
The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 20.3 │ 0 │
|
||||
│ 22.1 │ 0 │
|
||||
│ 21.9 │ 0 │
|
||||
│ 18.9 │ 1 │
|
||||
│ 20.3 │ 1 │
|
||||
│ 19 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─welchTTest(sample_data, sample_index)─────┐
|
||||
│ (2.7988719532211235,0.051807360348581945) │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
|
||||
- [studentTTest function](studentttest.md#studentttest)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
|
@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that
|
||||
Details of the algorithm:
|
||||
|
||||
- If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary.
|
||||
- If there are overlapping ranges, you can use any.
|
||||
- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides.
|
||||
- If there are overlapping ranges, it returns value for any (random) range.
|
||||
- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides.
|
||||
|
||||
Configuration example:
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: QUOTA
|
||||
|
||||
# ALTER QUOTA {#alter-quota-statement}
|
||||
|
||||
Changes [quotas](../../../operations/access-rights.md#quotas-management).
|
||||
Changes quotas.
|
||||
|
||||
Syntax:
|
||||
|
||||
@ -14,13 +14,13 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
|
||||
[RENAME TO new_name]
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
|
||||
|
||||
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
|
||||
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
|
@ -13,14 +13,14 @@ Syntax:
|
||||
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
|
||||
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
|
||||
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
|
||||
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
{MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
|
||||
NO LIMITS | TRACKING ONLY} [,...]]
|
||||
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
|
||||
```
|
||||
|
||||
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
|
||||
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
|
||||
|
||||
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
|
||||
|
||||
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
||||
|
@ -59,6 +59,10 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note
|
||||
|
||||
The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
|
||||
|
||||
Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
|
||||
|
||||
Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query.
|
||||
|
||||
There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->
|
||||
|
@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum
|
||||
- The values calculated from the `DEFAULT` expressions specified in the table definition.
|
||||
- Zeros and empty strings, if `DEFAULT` expressions are not defined.
|
||||
|
||||
If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query.
|
||||
|
||||
Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query:
|
||||
|
||||
``` sql
|
||||
|
@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств
|
||||
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`.
|
||||
- Репликация может быть легко нарушена.
|
||||
- Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены.
|
||||
- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`.
|
||||
|
||||
## Примеры использования {#examples-of-use}
|
||||
|
||||
@ -156,4 +157,4 @@ SELECT * FROM mysql.test;
|
||||
└───┴─────┴──────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) <!--hide-->
|
||||
|
@ -12,7 +12,10 @@ toc_priority: 30
|
||||
- [ODBC](../../../engines/table-engines/integrations/odbc.md)
|
||||
- [JDBC](../../../engines/table-engines/integrations/jdbc.md)
|
||||
- [MySQL](../../../engines/table-engines/integrations/mysql.md)
|
||||
- [MongoDB](../../../engines/table-engines/integrations/mongodb.md)
|
||||
- [HDFS](../../../engines/table-engines/integrations/hdfs.md)
|
||||
- [Kafka](../../../engines/table-engines/integrations/kafka.md)
|
||||
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
|
||||
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) <!--hide-->
|
||||
|
57
docs/ru/engines/table-engines/integrations/mongodb.md
Normal file
57
docs/ru/engines/table-engines/integrations/mongodb.md
Normal file
@ -0,0 +1,57 @@
|
||||
---
|
||||
toc_priority: 7
|
||||
toc_title: MongoDB
|
||||
---
|
||||
|
||||
# MongoDB {#mongodb}
|
||||
|
||||
Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается.
|
||||
|
||||
## Создание таблицы {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
(
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = MongoDB(host:port, database, collection, user, password);
|
||||
```
|
||||
|
||||
**Параметры движка**
|
||||
|
||||
- `host:port` — адрес сервера MongoDB.
|
||||
|
||||
- `database` — имя базы данных на удалённом сервере.
|
||||
|
||||
- `collection` — имя коллекции на удалённом сервере.
|
||||
|
||||
- `user` — пользователь MongoDB.
|
||||
|
||||
- `password` — пароль пользователя.
|
||||
|
||||
## Примеры использования {#usage-example}
|
||||
|
||||
Таблица в ClickHouse для чтения данных из колекции MongoDB:
|
||||
|
||||
``` text
|
||||
CREATE TABLE mongo_table
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
|
||||
```
|
||||
|
||||
Запрос к таблице:
|
||||
|
||||
``` sql
|
||||
SELECT COUNT() FROM mongo_table;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 4 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) <!--hide-->
|
@ -94,6 +94,7 @@ ORDER BY expr
|
||||
- `max_parts_in_total` — максимальное количество кусков во всех партициях.
|
||||
- `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
|
||||
- `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
|
||||
- `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках.
|
||||
|
||||
**Пример задания секций**
|
||||
|
||||
@ -711,4 +712,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
|
||||
После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) <!--hide-->
|
||||
|
@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все
|
||||
|
||||
При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
|
||||
|
||||
{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##}
|
||||
## max_partitions_to_read {#max-partitions-to-read}
|
||||
|
||||
Ограничивает максимальное число партиций для чтения в одном запросе.
|
||||
|
||||
Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Любое положительное целое число.
|
||||
|
||||
Значение по умолчанию: -1 (неограниченно).
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) <!--hide-->
|
||||
|
@ -2473,4 +2473,58 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
|
||||
|
||||
Значение по умолчанию: `16`.
|
||||
|
||||
## optimize_on_insert {#optimize-on-insert}
|
||||
|
||||
Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — выключена
|
||||
- 1 — включена.
|
||||
|
||||
Значение по умолчанию: 1.
|
||||
|
||||
**Пример**
|
||||
|
||||
Сравните добавление данных при включенной и выключенной настройке:
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SET optimize_on_insert = 1;
|
||||
|
||||
CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
|
||||
|
||||
INSERT INTO test1 SELECT number % 2 FROM numbers(5);
|
||||
|
||||
SELECT * FROM test1;
|
||||
|
||||
SET optimize_on_insert = 0;
|
||||
|
||||
CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
|
||||
|
||||
INSERT INTO test2 SELECT number % 2 FROM numbers(5);
|
||||
|
||||
SELECT * FROM test2;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─FirstTable─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
└────────────┘
|
||||
|
||||
┌─SecondTable─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
│ 1 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
|
||||
|
@ -6,29 +6,62 @@
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений:
|
||||
- `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок.
|
||||
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений:
|
||||
- `NEW_PART` — вставка нового куска.
|
||||
- `MERGE_PARTS` — слияние кусков.
|
||||
- `DOWNLOAD_PART` — загрузка с реплики.
|
||||
- `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
|
||||
- `MUTATE_PART` — изменение куска.
|
||||
- `MOVE_PART` — перемещение куска между дисками.
|
||||
- `event_date` (Date) — дата события.
|
||||
- `event_time` (DateTime) — время события.
|
||||
- `duration_ms` (UInt64) — длительность.
|
||||
- `database` (String) — имя базы данных, в которой находится кусок.
|
||||
- `table` (String) — имя таблицы, в которой находится кусок.
|
||||
- `part_name` (String) — имя куска.
|
||||
- `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`.
|
||||
- `rows` (UInt64) — число строк в куске.
|
||||
- `size_in_bytes` (UInt64) — размер куска данных в байтах.
|
||||
- `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
|
||||
- `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт.
|
||||
- `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков.
|
||||
- `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков.
|
||||
- `error` (UInt16) — код ошибки, возникшей при текущем событии.
|
||||
- `exception` (String) — текст ошибки.
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
|
||||
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность.
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок.
|
||||
- `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска.
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`.
|
||||
- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных.
|
||||
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске.
|
||||
- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах.
|
||||
- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска).
|
||||
- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт.
|
||||
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков.
|
||||
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков.
|
||||
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока.
|
||||
- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки.
|
||||
|
||||
Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
|
||||
event_type: NewPart
|
||||
event_date: 2021-02-02
|
||||
event_time: 2021-02-02 11:14:28
|
||||
duration_ms: 35
|
||||
database: default
|
||||
table: log_mt_2
|
||||
part_name: all_1_1_0
|
||||
partition_id: all
|
||||
path_on_disk: db/data/default/log_mt_2/all_1_1_0/
|
||||
rows: 115418
|
||||
size_in_bytes: 1074311
|
||||
merged_from: []
|
||||
bytes_uncompressed: 0
|
||||
read_rows: 0
|
||||
read_bytes: 0
|
||||
peak_memory_usage: 0
|
||||
error: 0
|
||||
exception:
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) <!--hide-->
|
||||
|
@ -4,8 +4,63 @@ toc_priority: 106
|
||||
|
||||
# argMax {#agg-function-argmax}
|
||||
|
||||
Синтаксис: `argMax(arg, val)`
|
||||
Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
|
||||
|
||||
Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений.
|
||||
Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
argMax(arg, val)
|
||||
```
|
||||
|
||||
или
|
||||
|
||||
``` sql
|
||||
argMax(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `arg` — аргумент.
|
||||
- `val` — значение.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение `arg`, соответствующее максимальному значению `val`.
|
||||
|
||||
Тип: соответствует типу `arg`.
|
||||
|
||||
Если передан кортеж:
|
||||
|
||||
- Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`.
|
||||
|
||||
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Исходная таблица:
|
||||
|
||||
``` text
|
||||
┌─user─────┬─salary─┐
|
||||
│ director │ 5000 │
|
||||
│ manager │ 3000 │
|
||||
│ worker │ 1000 │
|
||||
└──────────┴────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
|
||||
|
@ -4,11 +4,42 @@ toc_priority: 105
|
||||
|
||||
# argMin {#agg-function-argmin}
|
||||
|
||||
Синтаксис: `argMin(arg, val)`
|
||||
Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
|
||||
|
||||
Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений.
|
||||
Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md).
|
||||
|
||||
**Пример:**
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
argMin(arg, val)
|
||||
```
|
||||
|
||||
или
|
||||
|
||||
``` sql
|
||||
argMin(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `arg` — аргумент.
|
||||
- `val` — значение.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение `arg`, соответствующее минимальному значению `val`.
|
||||
|
||||
Тип: соответствует типу `arg`.
|
||||
|
||||
Если передан кортеж:
|
||||
|
||||
- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`.
|
||||
|
||||
Тип: [Tuple](../../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Исходная таблица:
|
||||
|
||||
``` text
|
||||
┌─user─────┬─salary─┐
|
||||
@ -18,14 +49,18 @@ toc_priority: 105
|
||||
└──────────┴────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT argMin(user, salary) FROM salary
|
||||
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─argMin(user, salary)─┐
|
||||
│ worker │
|
||||
└──────────────────────┘
|
||||
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐
|
||||
│ worker │ ('worker',1000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) <!--hide-->
|
||||
|
@ -0,0 +1,71 @@
|
||||
---
|
||||
toc_priority: 310
|
||||
toc_title: mannWhitneyUTest
|
||||
---
|
||||
|
||||
# mannWhitneyUTest {#mannwhitneyutest}
|
||||
|
||||
Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index)
|
||||
```
|
||||
|
||||
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
|
||||
Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние.
|
||||
Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
- `'two-sided'`;
|
||||
- `'greater'`;
|
||||
- `'less'`.
|
||||
- `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Пример**
|
||||
|
||||
Таблица:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 10 │ 0 │
|
||||
│ 11 │ 0 │
|
||||
│ 12 │ 0 │
|
||||
│ 1 │ 1 │
|
||||
│ 2 │ 1 │
|
||||
│ 3 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐
|
||||
│ (9,0.04042779918503192) │
|
||||
└────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) <!--hide-->
|
@ -0,0 +1,65 @@
|
||||
---
|
||||
toc_priority: 300
|
||||
toc_title: studentTTest
|
||||
---
|
||||
|
||||
# studentTTest {#studentttest}
|
||||
|
||||
Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
studentTTest(sample_data, sample_index)
|
||||
```
|
||||
|
||||
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
|
||||
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Пример**
|
||||
|
||||
Таблица:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 20.3 │ 0 │
|
||||
│ 21.1 │ 0 │
|
||||
│ 21.9 │ 1 │
|
||||
│ 21.7 │ 0 │
|
||||
│ 19.9 │ 1 │
|
||||
│ 21.8 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT studentTTest(sample_data, sample_index) FROM student_ttest;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─studentTTest(sample_data, sample_index)───┐
|
||||
│ (-0.21739130434783777,0.8385421208415731) │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0)
|
||||
- [welchTTest](welchttest.md#welchttest)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) <!--hide-->
|
@ -0,0 +1,65 @@
|
||||
---
|
||||
toc_priority: 301
|
||||
toc_title: welchTTest
|
||||
---
|
||||
|
||||
# welchTTest {#welchttest}
|
||||
|
||||
Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
welchTTest(sample_data, sample_index)
|
||||
```
|
||||
|
||||
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
|
||||
Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Пример**
|
||||
|
||||
Таблица:
|
||||
|
||||
``` text
|
||||
┌─sample_data─┬─sample_index─┐
|
||||
│ 20.3 │ 0 │
|
||||
│ 22.1 │ 0 │
|
||||
│ 21.9 │ 0 │
|
||||
│ 18.9 │ 1 │
|
||||
│ 20.3 │ 1 │
|
||||
│ 19 │ 1 │
|
||||
└─────────────┴──────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT welchTTest(sample_data, sample_index) FROM welch_ttest;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─welchTTest(sample_data, sample_index)─────┐
|
||||
│ (2.7988719532211235,0.051807360348581945) │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0)
|
||||
- [studentTTest](studentttest.md#studentttest)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->
|
@ -205,8 +205,8 @@ RANGE(MIN first MAX last)
|
||||
Особенности алгоритма:
|
||||
|
||||
- Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря.
|
||||
- Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий.
|
||||
- Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
|
||||
- Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона.
|
||||
- Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон.
|
||||
|
||||
Пример конфигурации:
|
||||
|
||||
|
@ -56,9 +56,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
|
||||
|
||||
Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
|
||||
|
||||
Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
|
||||
|
||||
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
|
||||
|
||||
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view)
|
||||
<!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->
|
||||
|
@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable
|
||||
- Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы.
|
||||
- Нули и пустые строки, если `DEFAULT` не определены.
|
||||
|
||||
Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе.
|
||||
|
||||
В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде:
|
||||
|
||||
``` sql
|
||||
|
@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
VersionedCollapsingMergeTree(sign, version)
|
||||
```
|
||||
|
||||
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划
|
||||
- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 行
|
||||
|
||||
列数据类型应为 `Int8`.
|
||||
|
||||
|
@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
|
||||
# 系统。动物园管理员 {#system-zookeeper}
|
||||
|
||||
如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。
|
||||
查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
|
||||
查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。
|
||||
|
||||
查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。
|
||||
要输出所有根节点的数据,write path= ‘/’.
|
||||
如果在指定的路径 ‘path’ 不存在,将引发异常。
|
||||
|
||||
查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。
|
||||
如果在指定的 ‘path’ 集合中有不存在的路径,将引发异常。
|
||||
它可以用来做一批ZooKeeper路径查询。
|
||||
|
||||
列:
|
||||
|
||||
- `name` (String) — The name of the node.
|
||||
|
@ -1374,7 +1374,30 @@ private:
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server\n");
|
||||
fmt::print(stderr, "Lost connection to the server.\n");
|
||||
|
||||
// Print the changed settings because they might be needed to
|
||||
// reproduce the error.
|
||||
const auto & changes = context.getSettingsRef().changes();
|
||||
if (!changes.empty())
|
||||
{
|
||||
fmt::print(stderr, "Changed settings: ");
|
||||
for (size_t i = 0; i < changes.size(); ++i)
|
||||
{
|
||||
if (i)
|
||||
{
|
||||
fmt::print(stderr, ", ");
|
||||
}
|
||||
fmt::print(stderr, "{} = '{}'", changes[i].name,
|
||||
toString(changes[i].value));
|
||||
}
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print(stderr, "No changed settings.\n");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -363,6 +363,16 @@ void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame)
|
||||
frame.end_offset = getRandomField(0).get<Int64>();
|
||||
break;
|
||||
}
|
||||
case 5:
|
||||
{
|
||||
frame.begin_preceding = fuzz_rand() % 2;
|
||||
break;
|
||||
}
|
||||
case 6:
|
||||
{
|
||||
frame.end_preceding = fuzz_rand() % 2;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -29,4 +29,25 @@
|
||||
<test>{display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 </test> <!-- if it matched to the substring "test" in the server display name - -->
|
||||
<production>{display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 </production> <!-- if it matched to the substring "production" in the server display name -->
|
||||
</prompt_by_server_display_name>
|
||||
|
||||
<!--
|
||||
Settings adjustable via command-line parameters
|
||||
can take their defaults from that config file, see examples:
|
||||
|
||||
<host>127.0.0.1</host>
|
||||
<port>9440</port>
|
||||
<secure>1</secure>
|
||||
<user>dbuser</user>
|
||||
<password>dbpwd123</password>
|
||||
<format>PrettyCompactMonoBlock</format>
|
||||
<multiline>1</multiline>
|
||||
<multiquery>1</multiquery>
|
||||
<stacktrace>1</stacktrace>
|
||||
<database>default2</database>
|
||||
<pager>less -SR</pager>
|
||||
<history_file>/home/user/clickhouse_custom_history.log</history_file>
|
||||
<max_parser_depth>2500</max_parser_depth>
|
||||
|
||||
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
|
||||
-->
|
||||
</config>
|
||||
|
@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete helping tables in both cases (whole table is done or not)
|
||||
dropHelpingTables(task_table);
|
||||
|
||||
if (!table_is_done)
|
||||
{
|
||||
throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution",
|
||||
@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
|
||||
{
|
||||
LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Delete helping tables in case that whole table is done
|
||||
dropHelpingTables(task_table);
|
||||
}
|
||||
|
||||
return table_is_done;
|
||||
}
|
||||
|
@ -31,6 +31,8 @@ struct Quota : public IAccessEntity
|
||||
enum ResourceType
|
||||
{
|
||||
QUERIES, /// Number of queries.
|
||||
QUERY_SELECTS, /// Number of select queries.
|
||||
QUERY_INSERTS, /// Number of inserts queries.
|
||||
ERRORS, /// Number of queries with exceptions.
|
||||
RESULT_ROWS, /// Number of rows returned as result.
|
||||
RESULT_BYTES, /// Number of bytes returned as result.
|
||||
@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType
|
||||
static const auto info = make_info("QUERIES", 1);
|
||||
return info;
|
||||
}
|
||||
case Quota::QUERY_SELECTS:
|
||||
{
|
||||
static const auto info = make_info("QUERY_SELECTS", 1);
|
||||
return info;
|
||||
}
|
||||
case Quota::QUERY_INSERTS:
|
||||
{
|
||||
static const auto info = make_info("QUERY_INSERTS", 1);
|
||||
return info;
|
||||
}
|
||||
case Quota::ERRORS:
|
||||
{
|
||||
static const auto info = make_info("ERRORS", 1);
|
||||
|
49
src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
Normal file
49
src/AggregateFunctions/AggregateFunctionDeltaSum.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#include <AggregateFunctions/AggregateFunctionDeltaSum.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionDeltaSum(
|
||||
const String & name,
|
||||
const DataTypes & arguments,
|
||||
const Array & params)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (arguments.size() != 1)
|
||||
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
DataTypePtr data_type = arguments[0];
|
||||
|
||||
if (isInteger(data_type) || isFloat(data_type))
|
||||
return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(
|
||||
*data_type, arguments, params));
|
||||
else
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("deltaSum", { createAggregateFunctionDeltaSum, properties });
|
||||
}
|
||||
|
||||
}
|
129
src/AggregateFunctions/AggregateFunctionDeltaSum.h
Normal file
129
src/AggregateFunctions/AggregateFunctionDeltaSum.h
Normal file
@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <experimental/type_traits>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AggregationFunctionDeltaSumData
|
||||
{
|
||||
T sum = 0;
|
||||
T last = 0;
|
||||
T first = 0;
|
||||
bool seen_last = false;
|
||||
bool seen_first = false;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class AggregationFunctionDeltaSum final
|
||||
: public IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{arguments, params}
|
||||
{}
|
||||
|
||||
AggregationFunctionDeltaSum()
|
||||
: IAggregateFunctionDataHelper<AggregationFunctionDeltaSumData<T>, AggregationFunctionDeltaSum<T>>{}
|
||||
{}
|
||||
|
||||
String getName() const override { return "deltaSum"; }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<T>>(); }
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
|
||||
if ((this->data(place).last < value) && this->data(place).seen_last)
|
||||
{
|
||||
this->data(place).sum += (value - this->data(place).last);
|
||||
}
|
||||
|
||||
this->data(place).last = value;
|
||||
this->data(place).seen_last = true;
|
||||
|
||||
if (!this->data(place).seen_first)
|
||||
{
|
||||
this->data(place).first = value;
|
||||
this->data(place).seen_first = true;
|
||||
}
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto place_data = &this->data(place);
|
||||
auto rhs_data = &this->data(rhs);
|
||||
|
||||
if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first)
|
||||
{
|
||||
// If the lhs last number seen is less than the first number the rhs saw, the lhs is before
|
||||
// the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the
|
||||
// difference between lhs last number and rhs first number (the 2 and 4). Then we want to
|
||||
// take last value from the rhs, so first and last become 0 and 7.
|
||||
|
||||
place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last);
|
||||
place_data->last = rhs_data->last;
|
||||
}
|
||||
else if ((rhs_data->last < place_data->first && rhs_data->seen_last && place_data->seen_first))
|
||||
{
|
||||
// In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we
|
||||
// assume the input interval states are sorted by time, we assume this is a counter
|
||||
// reset, and therefore do *not* add the difference between our first value and the
|
||||
// rhs last value.
|
||||
|
||||
place_data->sum += rhs_data->sum;
|
||||
place_data->first = rhs_data->first;
|
||||
}
|
||||
else if (rhs_data->seen_first)
|
||||
{
|
||||
// If we're here then the lhs is an empty state and the rhs does have some state, so
|
||||
// we'll just take that state.
|
||||
|
||||
place_data->first = rhs_data->first;
|
||||
place_data->seen_first = rhs_data->seen_first;
|
||||
place_data->last = rhs_data->last;
|
||||
place_data->seen_last = rhs_data->seen_last;
|
||||
place_data->sum = rhs_data->sum;
|
||||
}
|
||||
|
||||
// Otherwise lhs either has data or is uninitialized, so we don't need to modify its values.
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
writeIntBinary(this->data(place).sum, buf);
|
||||
writeIntBinary(this->data(place).first, buf);
|
||||
writeIntBinary(this->data(place).last, buf);
|
||||
writePODBinary<bool>(this->data(place).seen_first, buf);
|
||||
writePODBinary<bool>(this->data(place).seen_last, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
readIntBinary(this->data(place).sum, buf);
|
||||
readIntBinary(this->data(place).first, buf);
|
||||
readIntBinary(this->data(place).last, buf);
|
||||
readPODBinary<bool>(this->data(place).seen_first, buf);
|
||||
readPODBinary<bool>(this->data(place).seen_last, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).sum);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -16,6 +16,22 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename> class Data>
|
||||
AggregateFunctionPtr createAggregateFunctionBitmap(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
@ -28,7 +44,7 @@ namespace
|
||||
+ " is illegal, because it cannot be used in Bitmap operations",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitmap, Data>(*argument_types[0], argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception(
|
||||
@ -55,7 +71,7 @@ namespace
|
||||
const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
|
||||
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
|
||||
argument_type_ptr = aggfunc->getArgumentTypes()[0];
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(
|
||||
*argument_type_ptr, argument_type_ptr));
|
||||
if (!res)
|
||||
throw Exception(
|
||||
|
@ -32,6 +32,7 @@ template <typename T, UInt8 small_set_size>
|
||||
class RoaringBitmapWithSmallSet : private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
using UnsignedT = std::make_unsigned_t<T>;
|
||||
SmallSet<T, small_set_size> small;
|
||||
using ValueBuffer = std::vector<T>;
|
||||
using RoaringBitmap = std::conditional_t<sizeof(T) >= 8, roaring::Roaring64Map, roaring::Roaring>;
|
||||
@ -363,6 +364,7 @@ public:
|
||||
/**
|
||||
* Check whether the argument is the subset of this set.
|
||||
* Empty set is a subset of any other set (consistent with hasAll).
|
||||
* It's used in subset and currently only support comparing same type
|
||||
*/
|
||||
UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
|
||||
{
|
||||
@ -486,6 +488,7 @@ public:
|
||||
|
||||
/**
|
||||
* Return new set with specified range (not include the range_end)
|
||||
* It's used in subset and currently only support UInt32
|
||||
*/
|
||||
UInt64 rb_range(UInt64 range_start, UInt64 range_end, RoaringBitmapWithSmallSet & r1) const
|
||||
{
|
||||
@ -525,6 +528,7 @@ public:
|
||||
|
||||
/**
|
||||
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
|
||||
* It's used in subset and currently only support UInt32
|
||||
*/
|
||||
UInt64 rb_limit(UInt64 range_start, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
|
||||
{
|
||||
@ -578,10 +582,10 @@ public:
|
||||
{
|
||||
if (small.empty())
|
||||
return 0;
|
||||
auto min_val = std::numeric_limits<std::make_unsigned_t<T>>::max();
|
||||
auto min_val = std::numeric_limits<UnsignedT>::max();
|
||||
for (const auto & x : small)
|
||||
{
|
||||
auto val = x.getValue();
|
||||
UnsignedT val = x.getValue();
|
||||
if (val < min_val)
|
||||
min_val = val;
|
||||
}
|
||||
@ -597,10 +601,10 @@ public:
|
||||
{
|
||||
if (small.empty())
|
||||
return 0;
|
||||
auto max_val = std::numeric_limits<std::make_unsigned_t<T>>::min();
|
||||
UnsignedT max_val = 0;
|
||||
for (const auto & x : small)
|
||||
{
|
||||
auto val = x.getValue();
|
||||
UnsignedT val = x.getValue();
|
||||
if (val > max_val)
|
||||
max_val = val;
|
||||
}
|
||||
@ -611,7 +615,8 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace value
|
||||
* Replace value.
|
||||
* It's used in transform and currently can only support UInt32
|
||||
*/
|
||||
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
|
||||
{
|
||||
|
@ -147,7 +147,7 @@ public:
|
||||
}
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception("Aggregate function " + getName() + " require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
@ -158,13 +158,13 @@ public:
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception("Unknown parameter in aggregate function " + getName() +
|
||||
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
|
||||
". It must be one of: 'two-sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception("Aggregate function " + getName() + " require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ class AggregateFunctionFactory;
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionCount(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
|
||||
@ -66,6 +67,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionAvg(factory);
|
||||
registerAggregateFunctionAvgWeighted(factory);
|
||||
registerAggregateFunctionCount(factory);
|
||||
registerAggregateFunctionDeltaSum(factory);
|
||||
registerAggregateFunctionGroupArray(factory);
|
||||
registerAggregateFunctionGroupUniqArray(factory);
|
||||
registerAggregateFunctionGroupArrayInsertAt(factory);
|
||||
|
@ -19,6 +19,7 @@ SRCS(
|
||||
AggregateFunctionCategoricalInformationValue.cpp
|
||||
AggregateFunctionCombinatorFactory.cpp
|
||||
AggregateFunctionCount.cpp
|
||||
AggregateFunctionDeltaSum.cpp
|
||||
AggregateFunctionDistinct.cpp
|
||||
AggregateFunctionEntropy.cpp
|
||||
AggregateFunctionFactory.cpp
|
||||
|
@ -100,8 +100,8 @@ endif()
|
||||
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
|
||||
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
|
||||
|
||||
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp)
|
||||
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h)
|
||||
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
|
||||
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
|
||||
|
||||
list (APPEND dbms_sources
|
||||
AggregateFunctions/AggregateFunctionFactory.cpp
|
||||
|
@ -75,8 +75,28 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
|
||||
ColumnAggregateFunction::~ColumnAggregateFunction()
|
||||
{
|
||||
if (!func->hasTrivialDestructor() && !src)
|
||||
for (auto * val : data)
|
||||
func->destroy(val);
|
||||
{
|
||||
if (copiedDataInfo.empty())
|
||||
{
|
||||
for (auto * val : data)
|
||||
{
|
||||
func->destroy(val);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t pos;
|
||||
for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
|
||||
{
|
||||
pos = it->getValue().second;
|
||||
if (data[pos] != nullptr)
|
||||
{
|
||||
func->destroy(data[pos]);
|
||||
data[pos] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
|
||||
@ -455,14 +475,37 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
|
||||
/// (only as a whole, see comment above).
|
||||
ensureOwnership();
|
||||
insertDefault();
|
||||
insertMergeFrom(from, n);
|
||||
insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
|
||||
{
|
||||
ensureOwnership();
|
||||
insertDefault();
|
||||
insertMergeFrom(place);
|
||||
insertCopyFrom(place);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
|
||||
{
|
||||
Map::LookupResult result;
|
||||
result = copiedDataInfo.find(place);
|
||||
if (result == nullptr)
|
||||
{
|
||||
copiedDataInfo[place] = data.size()-1;
|
||||
func->merge(data.back(), place, &createOrGetArena());
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t pos = result->getValue().second;
|
||||
if (pos != data.size() - 1)
|
||||
{
|
||||
data[data.size() - 1] = data[pos];
|
||||
}
|
||||
else /// insert same data to same pos, merge them.
|
||||
{
|
||||
func->merge(data.back(), place, &createOrGetArena());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
|
||||
@ -697,5 +740,4 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
|
||||
return cloned_col;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -82,6 +84,17 @@ private:
|
||||
/// Name of the type to distinguish different aggregation states.
|
||||
String type_string;
|
||||
|
||||
/// MergedData records, used to avoid duplicated data copy.
|
||||
///key: src pointer, val: pos in current column.
|
||||
using Map = HashMap<
|
||||
ConstAggregateDataPtr,
|
||||
size_t,
|
||||
DefaultHash<ConstAggregateDataPtr>,
|
||||
HashTableGrower<3>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
|
||||
|
||||
Map copiedDataInfo;
|
||||
|
||||
ColumnAggregateFunction() {}
|
||||
|
||||
/// Create a new column that has another column as a source.
|
||||
@ -140,6 +153,8 @@ public:
|
||||
|
||||
void insertFrom(ConstAggregateDataPtr place);
|
||||
|
||||
void insertCopyFrom(ConstAggregateDataPtr place);
|
||||
|
||||
/// Merge state at last row with specified state in another column.
|
||||
void insertMergeFrom(ConstAggregateDataPtr place);
|
||||
|
||||
|
@ -28,12 +28,12 @@ namespace ColumnsHashing
|
||||
|
||||
/// For the case when there is one numeric key.
|
||||
/// UInt8/16/32/64 for any type with corresponding bit width.
|
||||
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
|
||||
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true, bool need_offset = false>
|
||||
struct HashMethodOneNumber
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
{
|
||||
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
|
||||
const char * vec;
|
||||
|
||||
@ -70,12 +70,12 @@ struct HashMethodOneNumber
|
||||
|
||||
|
||||
/// For the case when there is one string key.
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
|
||||
struct HashMethodString
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
{
|
||||
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
|
||||
const IColumn::Offset * offsets;
|
||||
const UInt8 * chars;
|
||||
@ -108,12 +108,13 @@ protected:
|
||||
|
||||
|
||||
/// For the case when there is one fixed-length string key.
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
|
||||
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true, bool need_offset = false>
|
||||
struct HashMethodFixedString
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
|
||||
: public columns_hashing_impl::
|
||||
HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
{
|
||||
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
|
||||
size_t n;
|
||||
const ColumnFixedString::Chars * chars;
|
||||
@ -454,13 +455,13 @@ template <>
|
||||
struct LowCardinalityKeys<false> {};
|
||||
|
||||
/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
|
||||
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
|
||||
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false>
|
||||
struct HashMethodKeysFixed
|
||||
: private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
|
||||
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
|
||||
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
{
|
||||
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
|
||||
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>;
|
||||
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
|
||||
|
||||
static constexpr bool has_nullable_keys = has_nullable_keys_;
|
||||
@ -540,13 +541,13 @@ protected:
|
||||
};
|
||||
|
||||
/// For the case when there is one string key.
|
||||
template <typename Value, typename Mapped, bool use_cache = true>
|
||||
template <typename Value, typename Mapped, bool use_cache = true, bool need_offset = false>
|
||||
struct HashMethodHashed
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
|
||||
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
{
|
||||
using Key = UInt128;
|
||||
using Self = HashMethodHashed<Value, Mapped, use_cache>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
using Self = HashMethodHashed<Value, Mapped, use_cache, need_offset>;
|
||||
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache, need_offset>;
|
||||
|
||||
ColumnRawPtrs key_columns;
|
||||
|
||||
|
@ -87,34 +87,61 @@ public:
|
||||
bool isInserted() const { return inserted; }
|
||||
};
|
||||
|
||||
template <typename Mapped>
|
||||
class FindResultImpl
|
||||
/// FindResult optionally may contain pointer to value and offset in hashtable buffer.
|
||||
/// Only bool found is required.
|
||||
/// So we will have 4 different specializations for FindResultImpl
|
||||
class FindResultImplBase
|
||||
{
|
||||
Mapped * value;
|
||||
bool found;
|
||||
|
||||
public:
|
||||
FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {}
|
||||
explicit FindResultImplBase(bool found_) : found(found_) {}
|
||||
bool isFound() const { return found; }
|
||||
Mapped & getMapped() const { return *value; }
|
||||
};
|
||||
|
||||
template <bool need_offset = false>
|
||||
class FindResultImplOffsetBase
|
||||
{
|
||||
public:
|
||||
constexpr static bool has_offset = need_offset;
|
||||
explicit FindResultImplOffsetBase(size_t /* off */) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
class FindResultImpl<void>
|
||||
class FindResultImplOffsetBase<true>
|
||||
{
|
||||
bool found;
|
||||
|
||||
size_t offset;
|
||||
public:
|
||||
explicit FindResultImpl(bool found_) : found(found_) {}
|
||||
bool isFound() const { return found; }
|
||||
constexpr static bool has_offset = true;
|
||||
|
||||
explicit FindResultImplOffsetBase(size_t off) : offset(off) {}
|
||||
ALWAYS_INLINE size_t getOffset() const { return offset; }
|
||||
};
|
||||
|
||||
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
|
||||
template <typename Mapped, bool need_offset = false>
|
||||
class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
|
||||
{
|
||||
Mapped * value;
|
||||
|
||||
public:
|
||||
FindResultImpl(Mapped * value_, bool found_, size_t off)
|
||||
: FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off), value(value_) {}
|
||||
Mapped & getMapped() const { return *value; }
|
||||
};
|
||||
|
||||
template <bool need_offset>
|
||||
class FindResultImpl<void, need_offset> : public FindResultImplBase, public FindResultImplOffsetBase<need_offset>
|
||||
{
|
||||
public:
|
||||
FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase<need_offset>(off) {}
|
||||
};
|
||||
|
||||
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization, bool need_offset = false>
|
||||
class HashMethodBase
|
||||
{
|
||||
public:
|
||||
using EmplaceResult = EmplaceResultImpl<Mapped>;
|
||||
using FindResult = FindResultImpl<Mapped>;
|
||||
using FindResult = FindResultImpl<Mapped, need_offset>;
|
||||
static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
|
||||
using Cache = LastElementCache<Value, consecutive_keys_optimization>;
|
||||
|
||||
@ -217,12 +244,15 @@ protected:
|
||||
{
|
||||
if constexpr (Cache::consecutive_keys_optimization)
|
||||
{
|
||||
/// It's possible to support such combination, but code will became more complex.
|
||||
/// Now there's not place where we need this options enabled together
|
||||
static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
|
||||
if (cache.check(key))
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
return FindResult(&cache.value.second, cache.found);
|
||||
return FindResult(&cache.value.second, cache.found, 0);
|
||||
else
|
||||
return FindResult(cache.found);
|
||||
return FindResult(cache.found, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -247,10 +277,15 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
size_t offset = 0;
|
||||
if constexpr (FindResult::has_offset)
|
||||
{
|
||||
offset = it ? data.offsetInternal(it) : 0;
|
||||
}
|
||||
if constexpr (has_mapped)
|
||||
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
|
||||
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
|
||||
else
|
||||
return FindResult(it != nullptr);
|
||||
return FindResult(it != nullptr, offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -63,9 +63,6 @@ public:
|
||||
/// Call from master thread as soon as possible (e.g. when thread accepted connection)
|
||||
static void initializeQuery();
|
||||
|
||||
/// Sets query_context for current thread group
|
||||
static void attachQueryContext(Context & query_context);
|
||||
|
||||
/// You must call one of these methods when create a query child thread:
|
||||
/// Add current thread to a group associated with the thread group
|
||||
static void attachTo(const ThreadGroupStatusPtr & thread_group);
|
||||
@ -99,6 +96,10 @@ public:
|
||||
|
||||
private:
|
||||
static void defaultThreadDeleter();
|
||||
|
||||
/// Sets query_context for current thread group
|
||||
/// Can by used only through QueryScope
|
||||
static void attachQueryContext(Context & query_context);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -538,7 +538,8 @@
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
M(1001, STD_EXCEPTION) \
|
||||
M(1002, UNKNOWN_EXCEPTION)
|
||||
M(1002, UNKNOWN_EXCEPTION) \
|
||||
M(1003, INVALID_SHARD_ID)
|
||||
|
||||
/* See END */
|
||||
|
||||
|
@ -476,6 +476,17 @@ public:
|
||||
|
||||
size_t getBufferSizeInCells() const { return NUM_CELLS; }
|
||||
|
||||
/// Return offset for result in internal buffer.
|
||||
/// Result can have value up to `getBufferSizeInCells() + 1`
|
||||
/// because offset for zero value considered to be 0
|
||||
/// and for other values it will be `offset in buffer + 1`
|
||||
size_t offsetInternal(ConstLookupResult ptr) const
|
||||
{
|
||||
if (ptr->isZero(*this))
|
||||
return 0;
|
||||
return ptr - buf + 1;
|
||||
}
|
||||
|
||||
const Cell * data() const { return buf; }
|
||||
Cell * data() { return buf; }
|
||||
|
||||
|
@ -109,6 +109,11 @@ struct HashMapCell
|
||||
DB::assertChar(',', rb);
|
||||
DB::readDoubleQuoted(value.second, rb);
|
||||
}
|
||||
|
||||
static bool constexpr need_to_notify_cell_during_move = false;
|
||||
|
||||
static void move(HashMapCell * /* old_location */, HashMapCell * /* new_location */) {}
|
||||
|
||||
};
|
||||
|
||||
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
|
||||
|
@ -69,11 +69,16 @@ namespace ZeroTraits
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
bool check(const T x) { return x == 0; }
|
||||
inline bool check(const T x) { return x == 0; }
|
||||
|
||||
template <typename T>
|
||||
void set(T & x) { x = 0; }
|
||||
inline void set(T & x) { x = 0; }
|
||||
|
||||
template <>
|
||||
inline bool check(const char * x) { return x == nullptr; }
|
||||
|
||||
template <>
|
||||
inline void set(const char *& x){ x = nullptr; }
|
||||
}
|
||||
|
||||
|
||||
@ -204,6 +209,13 @@ struct HashTableCell
|
||||
/// Deserialization, in binary and text form.
|
||||
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
|
||||
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
|
||||
|
||||
/// When cell pointer is moved during erase, reinsert or resize operations
|
||||
|
||||
static constexpr bool need_to_notify_cell_during_move = false;
|
||||
|
||||
static void move(HashTableCell * /* old_location */, HashTableCell * /* new_location */) {}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
@ -334,6 +346,32 @@ struct ZeroValueStorage<false, Cell>
|
||||
};
|
||||
|
||||
|
||||
template <bool enable, typename Allocator, typename Cell>
|
||||
struct AllocatorBufferDeleter;
|
||||
|
||||
template <typename Allocator, typename Cell>
|
||||
struct AllocatorBufferDeleter<false, Allocator, Cell>
|
||||
{
|
||||
AllocatorBufferDeleter(Allocator &, size_t) {}
|
||||
|
||||
void operator()(Cell *) const {}
|
||||
|
||||
};
|
||||
|
||||
template <typename Allocator, typename Cell>
|
||||
struct AllocatorBufferDeleter<true, Allocator, Cell>
|
||||
{
|
||||
AllocatorBufferDeleter(Allocator & allocator_, size_t size_)
|
||||
: allocator(allocator_)
|
||||
, size(size_) {}
|
||||
|
||||
void operator()(Cell * buffer) const { allocator.free(buffer, size); }
|
||||
|
||||
Allocator & allocator;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
|
||||
// The HashTable
|
||||
template
|
||||
<
|
||||
@ -427,7 +465,6 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Increase the size of the buffer.
|
||||
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0)
|
||||
{
|
||||
@ -460,7 +497,24 @@ protected:
|
||||
new_grower.increaseSize();
|
||||
|
||||
/// Expand the space.
|
||||
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, getBufferSizeInBytes(), new_grower.bufSize() * sizeof(Cell)));
|
||||
|
||||
size_t old_buffer_size = getBufferSizeInBytes();
|
||||
|
||||
/** If cell required to be notified during move we need to temporary keep old buffer
|
||||
* because realloc does not quarantee for reallocated buffer to have same base address
|
||||
*/
|
||||
using Deleter = AllocatorBufferDeleter<Cell::need_to_notify_cell_during_move, Allocator, Cell>;
|
||||
Deleter buffer_deleter(*this, old_buffer_size);
|
||||
std::unique_ptr<Cell, Deleter> old_buffer(buf, buffer_deleter);
|
||||
|
||||
if constexpr (Cell::need_to_notify_cell_during_move)
|
||||
{
|
||||
buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
|
||||
memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
|
||||
}
|
||||
else
|
||||
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
|
||||
|
||||
grower = new_grower;
|
||||
|
||||
/** Now some items may need to be moved to a new location.
|
||||
@ -470,7 +524,12 @@ protected:
|
||||
size_t i = 0;
|
||||
for (; i < old_size; ++i)
|
||||
if (!buf[i].isZero(*this))
|
||||
reinsert(buf[i], buf[i].getHash(*this));
|
||||
{
|
||||
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
|
||||
|
||||
if constexpr (Cell::need_to_notify_cell_during_move)
|
||||
Cell::move(&(old_buffer.get())[i], &buf[updated_place_value]);
|
||||
}
|
||||
|
||||
/** There is also a special case:
|
||||
* if the element was to be at the end of the old buffer, [ x]
|
||||
@ -481,7 +540,13 @@ protected:
|
||||
* process tail from the collision resolution chain immediately after it [ o x ]
|
||||
*/
|
||||
for (; !buf[i].isZero(*this); ++i)
|
||||
reinsert(buf[i], buf[i].getHash(*this));
|
||||
{
|
||||
size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this));
|
||||
|
||||
if constexpr (Cell::need_to_notify_cell_during_move)
|
||||
if (&buf[i] != &buf[updated_place_value])
|
||||
Cell::move(&buf[i], &buf[updated_place_value]);
|
||||
}
|
||||
|
||||
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
||||
watch.stop();
|
||||
@ -495,20 +560,20 @@ protected:
|
||||
/** Paste into the new buffer the value that was in the old buffer.
|
||||
* Used when increasing the buffer size.
|
||||
*/
|
||||
void reinsert(Cell & x, size_t hash_value)
|
||||
size_t reinsert(Cell & x, size_t hash_value)
|
||||
{
|
||||
size_t place_value = grower.place(hash_value);
|
||||
|
||||
/// If the element is in its place.
|
||||
if (&x == &buf[place_value])
|
||||
return;
|
||||
return place_value;
|
||||
|
||||
/// Compute a new location, taking into account the collision resolution chain.
|
||||
place_value = findCell(Cell::getKey(x.getValue()), hash_value, place_value);
|
||||
|
||||
/// If the item remains in its place in the old collision resolution chain.
|
||||
if (!buf[place_value].isZero(*this))
|
||||
return;
|
||||
return place_value;
|
||||
|
||||
/// Copy to a new location and zero the old one.
|
||||
x.setHash(hash_value);
|
||||
@ -516,6 +581,7 @@ protected:
|
||||
x.setZero();
|
||||
|
||||
/// Then the elements that previously were in collision with this can move to the old place.
|
||||
return place_value;
|
||||
}
|
||||
|
||||
|
||||
@ -881,7 +947,11 @@ public:
|
||||
/// Reinsert node pointed to by iterator
|
||||
void ALWAYS_INLINE reinsert(iterator & it, size_t hash_value)
|
||||
{
|
||||
reinsert(*it.getPtr(), hash_value);
|
||||
size_t place_value = reinsert(*it.getPtr(), hash_value);
|
||||
|
||||
if constexpr (Cell::need_to_notify_cell_during_move)
|
||||
if (it.getPtr() != &buf[place_value])
|
||||
Cell::move(it.getPtr(), &buf[place_value]);
|
||||
}
|
||||
|
||||
|
||||
@ -958,8 +1028,14 @@ public:
|
||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
|
||||
}
|
||||
|
||||
std::enable_if_t<Grower::performs_linear_probing_with_single_step, void>
|
||||
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
|
||||
ALWAYS_INLINE erase(const Key & x)
|
||||
{
|
||||
return erase(x, hash(x));
|
||||
}
|
||||
|
||||
std::enable_if_t<Grower::performs_linear_probing_with_single_step, bool>
|
||||
ALWAYS_INLINE erase(const Key & x, size_t hash_value)
|
||||
{
|
||||
/** Deletion from open addressing hash table without tombstones
|
||||
*
|
||||
@ -977,21 +1053,19 @@ public:
|
||||
{
|
||||
--m_size;
|
||||
this->clearHasZero();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
size_t hash_value = hash(x);
|
||||
size_t erased_key_position = findCell(x, hash_value, grower.place(hash_value));
|
||||
|
||||
/// Key is not found
|
||||
if (buf[erased_key_position].isZero(*this))
|
||||
{
|
||||
return;
|
||||
}
|
||||
return false;
|
||||
|
||||
/// We need to guarantee loop termination because there will be empty position
|
||||
assert(m_size < grower.bufSize());
|
||||
@ -1056,12 +1130,18 @@ public:
|
||||
|
||||
/// Move the element to the freed place
|
||||
memcpy(static_cast<void *>(&buf[erased_key_position]), static_cast<void *>(&buf[next_position]), sizeof(Cell));
|
||||
|
||||
if constexpr (Cell::need_to_notify_cell_during_move)
|
||||
Cell::move(&buf[next_position], &buf[erased_key_position]);
|
||||
|
||||
/// Now we have another freed place
|
||||
erased_key_position = next_position;
|
||||
}
|
||||
|
||||
buf[erased_key_position].setZero();
|
||||
--m_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE has(const Key & x) const
|
||||
@ -1214,6 +1294,17 @@ public:
|
||||
return grower.bufSize();
|
||||
}
|
||||
|
||||
/// Return offset for result in internal buffer.
|
||||
/// Result can have value up to `getBufferSizeInCells() + 1`
|
||||
/// because offset for zero value considered to be 0
|
||||
/// and for other values it will be `offset in buffer + 1`
|
||||
size_t offsetInternal(ConstLookupResult ptr) const
|
||||
{
|
||||
if (ptr->isZero(*this))
|
||||
return 0;
|
||||
return ptr - buf + 1;
|
||||
}
|
||||
|
||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
size_t getCollisions() const
|
||||
{
|
||||
|
244
src/Common/HashTable/LRUHashMap.h
Normal file
244
src/Common/HashTable/LRUHashMap.h
Normal file
@ -0,0 +1,244 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
|
||||
#include <boost/intrusive/trivial_value_traits.hpp>
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
|
||||
template <typename TKey, typename TMapped, typename Hash, bool save_hash_in_cell>
|
||||
struct LRUHashMapCell :
|
||||
public std::conditional_t<save_hash_in_cell,
|
||||
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
|
||||
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>
|
||||
{
|
||||
public:
|
||||
using Key = TKey;
|
||||
|
||||
using Base = std::conditional_t<save_hash_in_cell,
|
||||
HashMapCellWithSavedHash<TKey, TMapped, Hash, HashTableNoState>,
|
||||
HashMapCell<TKey, TMapped, Hash, HashTableNoState>>;
|
||||
|
||||
using Mapped = typename Base::Mapped;
|
||||
using State = typename Base::State;
|
||||
|
||||
using mapped_type = Mapped;
|
||||
using key_type = Key;
|
||||
|
||||
using Base::Base;
|
||||
|
||||
static bool constexpr need_to_notify_cell_during_move = true;
|
||||
|
||||
static void move(LRUHashMapCell * __restrict old_location, LRUHashMapCell * __restrict new_location)
|
||||
{
|
||||
/** We update new location prev and next pointers because during hash table resize
|
||||
* they can be updated during move of another cell.
|
||||
*/
|
||||
|
||||
new_location->prev = old_location->prev;
|
||||
new_location->next = old_location->next;
|
||||
|
||||
LRUHashMapCell * prev = new_location->prev;
|
||||
LRUHashMapCell * next = new_location->next;
|
||||
|
||||
/// Updated previous next and next previous nodes of list to point to new location
|
||||
|
||||
if (prev)
|
||||
prev->next = new_location;
|
||||
|
||||
if (next)
|
||||
next->prev = new_location;
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename, typename, typename, bool>
|
||||
friend class LRUHashMapCellNodeTraits;
|
||||
|
||||
LRUHashMapCell * next = nullptr;
|
||||
LRUHashMapCell * prev = nullptr;
|
||||
};
|
||||
|
||||
template<typename Key, typename Value, typename Hash, bool save_hash_in_cell>
|
||||
struct LRUHashMapCellNodeTraits
|
||||
{
|
||||
using node = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell>;
|
||||
using node_ptr = LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
|
||||
using const_node_ptr = const LRUHashMapCell<Key, Value, Hash, save_hash_in_cell> *;
|
||||
|
||||
static node * get_next(const node * ptr) { return ptr->next; }
|
||||
static void set_next(node * __restrict ptr, node * __restrict next) { ptr->next = next; }
|
||||
static node * get_previous(const node * ptr) { return ptr->prev; }
|
||||
static void set_previous(node * __restrict ptr, node * __restrict prev) { ptr->prev = prev; }
|
||||
};
|
||||
|
||||
template <typename TKey, typename TValue, typename Hash, bool save_hash_in_cells>
|
||||
class LRUHashMapImpl :
|
||||
private HashMapTable<
|
||||
TKey,
|
||||
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
|
||||
Hash,
|
||||
HashTableGrower<>,
|
||||
HashTableAllocator>
|
||||
{
|
||||
using Base = HashMapTable<
|
||||
TKey,
|
||||
LRUHashMapCell<TKey, TValue, Hash, save_hash_in_cells>,
|
||||
Hash,
|
||||
HashTableGrower<>,
|
||||
HashTableAllocator>;
|
||||
public:
|
||||
using Key = TKey;
|
||||
using Value = TValue;
|
||||
|
||||
using Cell = LRUHashMapCell<Key, Value, Hash, save_hash_in_cells>;
|
||||
|
||||
using LRUHashMapCellIntrusiveValueTraits =
|
||||
boost::intrusive::trivial_value_traits<
|
||||
LRUHashMapCellNodeTraits<Key, Value, Hash, save_hash_in_cells>,
|
||||
boost::intrusive::link_mode_type::normal_link>;
|
||||
|
||||
using LRUList = boost::intrusive::list<
|
||||
Cell,
|
||||
boost::intrusive::value_traits<LRUHashMapCellIntrusiveValueTraits>,
|
||||
boost::intrusive::constant_time_size<false>>;
|
||||
|
||||
using iterator = typename LRUList::iterator;
|
||||
using const_iterator = typename LRUList::const_iterator;
|
||||
using reverse_iterator = typename LRUList::reverse_iterator;
|
||||
using const_reverse_iterator = typename LRUList::const_reverse_iterator;
|
||||
|
||||
LRUHashMapImpl(size_t max_size_, bool preallocate_max_size_in_hash_map = false)
|
||||
: Base(preallocate_max_size_in_hash_map ? max_size_ : 32)
|
||||
, max_size(max_size_)
|
||||
{
|
||||
assert(max_size > 0);
|
||||
}
|
||||
|
||||
std::pair<Cell *, bool> insert(const Key & key, const Value & value)
|
||||
{
|
||||
return emplace(key, value);
|
||||
}
|
||||
|
||||
std::pair<Cell *, bool> insert(const Key & key, Value && value)
|
||||
{
|
||||
return emplace(key, std::move(value));
|
||||
}
|
||||
|
||||
template<typename ...Args>
|
||||
std::pair<Cell *, bool> emplace(const Key & key, Args&&... args)
|
||||
{
|
||||
size_t hash_value = Base::hash(key);
|
||||
|
||||
Cell * it = Base::find(key, hash_value);
|
||||
|
||||
if (it)
|
||||
{
|
||||
/// Cell contains element return it and put to the end of lru list
|
||||
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
|
||||
return std::make_pair(it, false);
|
||||
}
|
||||
|
||||
if (size() == max_size)
|
||||
{
|
||||
/// Erase least recently used element from front of the list
|
||||
Cell & node = lru_list.front();
|
||||
|
||||
const Key & element_to_remove_key = node.getKey();
|
||||
size_t key_hash = node.getHash(*this);
|
||||
|
||||
lru_list.pop_front();
|
||||
|
||||
[[maybe_unused]] bool erased = Base::erase(element_to_remove_key, key_hash);
|
||||
assert(erased);
|
||||
}
|
||||
|
||||
[[maybe_unused]] bool inserted;
|
||||
|
||||
/// Insert value first try to insert in zero storage if not then insert in buffer
|
||||
if (!Base::emplaceIfZero(key, it, inserted, hash_value))
|
||||
Base::emplaceNonZero(key, it, inserted, hash_value);
|
||||
|
||||
assert(inserted);
|
||||
|
||||
new (&it->getMapped()) Value(std::forward<Args>(args)...);
|
||||
|
||||
/// Put cell to the end of lru list
|
||||
lru_list.insert(lru_list.end(), *it);
|
||||
|
||||
return std::make_pair(it, true);
|
||||
}
|
||||
|
||||
using Base::find;
|
||||
|
||||
Value & get(const Key & key)
|
||||
{
|
||||
auto it = Base::find(key);
|
||||
assert(it);
|
||||
|
||||
Value & value = it->getMapped();
|
||||
|
||||
/// Put cell to the end of lru list
|
||||
lru_list.splice(lru_list.end(), lru_list, lru_list.iterator_to(*it));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
const Value & get(const Key & key) const
|
||||
{
|
||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
|
||||
}
|
||||
|
||||
bool contains(const Key & key) const
|
||||
{
|
||||
return Base::has(key);
|
||||
}
|
||||
|
||||
bool erase(const Key & key)
|
||||
{
|
||||
auto hash = Base::hash(key);
|
||||
auto it = Base::find(key, hash);
|
||||
|
||||
if (!it)
|
||||
return false;
|
||||
|
||||
lru_list.erase(lru_list.iterator_to(*it));
|
||||
|
||||
return Base::erase(key, hash);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
lru_list.clear();
|
||||
Base::clear();
|
||||
}
|
||||
|
||||
using Base::size;
|
||||
|
||||
size_t getMaxSize() const { return max_size; }
|
||||
|
||||
iterator begin() { return lru_list.begin(); }
|
||||
const_iterator begin() const { return lru_list.cbegin(); }
|
||||
iterator end() { return lru_list.end(); }
|
||||
const_iterator end() const { return lru_list.cend(); }
|
||||
|
||||
reverse_iterator rbegin() { return lru_list.rbegin(); }
|
||||
const_reverse_iterator rbegin() const { return lru_list.crbegin(); }
|
||||
reverse_iterator rend() { return lru_list.rend(); }
|
||||
const_reverse_iterator rend() const { return lru_list.crend(); }
|
||||
|
||||
private:
|
||||
size_t max_size;
|
||||
LRUList lru_list;
|
||||
};
|
||||
|
||||
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
|
||||
using LRUHashMap = LRUHashMapImpl<Key, Value, Hash, false>;
|
||||
|
||||
template <typename Key, typename Value, typename Hash = DefaultHash<Key>>
|
||||
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Hash, true>;
|
@ -99,6 +99,11 @@ ThreadStatus::~ThreadStatus()
|
||||
/// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent.
|
||||
}
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
/// It may cause segfault if query_context was destroyed, but was not detached
|
||||
assert((!query_context && query_id.empty()) || (query_context && query_id == query_context->getCurrentQueryId()));
|
||||
#endif
|
||||
|
||||
if (deleter)
|
||||
deleter();
|
||||
current_thread = nullptr;
|
||||
|
@ -201,7 +201,7 @@ public:
|
||||
void setFatalErrorCallback(std::function<void()> callback);
|
||||
void onFatalError();
|
||||
|
||||
/// Sets query context for current thread and its thread group
|
||||
/// Sets query context for current master thread and its thread group
|
||||
/// NOTE: query_context have to be alive until detachQuery() is called
|
||||
void attachQueryContext(Context & query_context);
|
||||
|
||||
|
@ -38,6 +38,9 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
||||
add_executable (pod_array pod_array.cpp)
|
||||
target_link_libraries (pod_array PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
|
||||
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (thread_creation_latency thread_creation_latency.cpp)
|
||||
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
|
||||
|
||||
|
161
src/Common/tests/gtest_lru_hash_map.cpp
Normal file
161
src/Common/tests/gtest_lru_hash_map.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include <Common/HashTable/LRUHashMap.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
template<typename LRUHashMap>
|
||||
std::vector<typename LRUHashMap::Key> convertToVector(const LRUHashMap & map)
|
||||
{
|
||||
std::vector<typename LRUHashMap::Key> result;
|
||||
result.reserve(map.size());
|
||||
|
||||
for (auto & node: map)
|
||||
result.emplace_back(node.getKey());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void testInsert(size_t elements_to_insert_size, size_t map_size)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(map_size);
|
||||
|
||||
std::vector<int> expected;
|
||||
|
||||
for (size_t i = 0; i < elements_to_insert_size; ++i)
|
||||
map.insert(i, i);
|
||||
|
||||
for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i)
|
||||
expected.emplace_back(i);
|
||||
|
||||
std::vector<int> actual = convertToVector(map);
|
||||
ASSERT_EQ(map.size(), actual.size());
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, Insert)
|
||||
{
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.insert(2, 2);
|
||||
int v = 3;
|
||||
map.insert(3, v);
|
||||
map.emplace(4, 4);
|
||||
|
||||
std::vector<int> expected = { 2, 3, 4 };
|
||||
std::vector<int> actual = convertToVector(map);
|
||||
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
testInsert(1200000, 1200000);
|
||||
testInsert(10, 5);
|
||||
testInsert(1200000, 2);
|
||||
testInsert(1200000, 1);
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, GetModify)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.emplace(2, 2);
|
||||
map.emplace(3, 3);
|
||||
|
||||
map.get(3) = 4;
|
||||
|
||||
std::vector<int> expected = { 1, 2, 4 };
|
||||
std::vector<int> actual;
|
||||
actual.reserve(map.size());
|
||||
|
||||
for (auto & node : map)
|
||||
actual.emplace_back(node.getMapped());
|
||||
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, SetRecentKeyToTop)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.emplace(2, 2);
|
||||
map.emplace(3, 3);
|
||||
map.emplace(1, 4);
|
||||
|
||||
std::vector<int> expected = { 2, 3, 1 };
|
||||
std::vector<int> actual = convertToVector(map);
|
||||
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, GetRecentKeyToTop)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.emplace(2, 2);
|
||||
map.emplace(3, 3);
|
||||
map.get(1);
|
||||
|
||||
std::vector<int> expected = { 2, 3, 1 };
|
||||
std::vector<int> actual = convertToVector(map);
|
||||
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, Contains)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.emplace(2, 2);
|
||||
map.emplace(3, 3);
|
||||
|
||||
ASSERT_TRUE(map.contains(1));
|
||||
ASSERT_TRUE(map.contains(2));
|
||||
ASSERT_TRUE(map.contains(3));
|
||||
ASSERT_EQ(map.size(), 3);
|
||||
|
||||
map.erase(1);
|
||||
map.erase(2);
|
||||
map.erase(3);
|
||||
|
||||
ASSERT_EQ(map.size(), 0);
|
||||
ASSERT_FALSE(map.contains(1));
|
||||
ASSERT_FALSE(map.contains(2));
|
||||
ASSERT_FALSE(map.contains(3));
|
||||
}
|
||||
|
||||
TEST(LRUHashMap, Clear)
|
||||
{
|
||||
using LRUHashMap = LRUHashMap<int, int>;
|
||||
|
||||
LRUHashMap map(3);
|
||||
|
||||
map.emplace(1, 1);
|
||||
map.emplace(2, 2);
|
||||
map.emplace(3, 3);
|
||||
map.clear();
|
||||
|
||||
std::vector<int> expected = {};
|
||||
std::vector<int> actual = convertToVector(map);
|
||||
|
||||
ASSERT_EQ(actual, expected);
|
||||
ASSERT_EQ(map.size(), 0);
|
||||
}
|
244
src/Common/tests/lru_hash_map_perf.cpp
Normal file
244
src/Common/tests/lru_hash_map_perf.cpp
Normal file
@ -0,0 +1,244 @@
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/HashTable/LRUHashMap.h>
|
||||
|
||||
template<class Key, class Value>
|
||||
class LRUHashMapBasic
|
||||
{
|
||||
public:
|
||||
using key_type = Key;
|
||||
using value_type = Value;
|
||||
using list_type = std::list<key_type>;
|
||||
using node = std::pair<value_type, typename list_type::iterator>;
|
||||
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
|
||||
|
||||
LRUHashMapBasic(size_t max_size_, bool preallocated)
|
||||
: hash_map(preallocated ? max_size_ : 32)
|
||||
, max_size(max_size_)
|
||||
{
|
||||
}
|
||||
|
||||
void insert(const Key &key, const Value &value)
|
||||
{
|
||||
auto it = hash_map.find(key);
|
||||
|
||||
if (it == hash_map.end())
|
||||
{
|
||||
if (size() >= max_size)
|
||||
{
|
||||
auto iterator_to_remove = list.begin();
|
||||
|
||||
hash_map.erase(*iterator_to_remove);
|
||||
list.erase(iterator_to_remove);
|
||||
}
|
||||
|
||||
list.push_back(key);
|
||||
hash_map[key] = std::make_pair(value, --list.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & [value_to_update, iterator_in_list_to_update] = it->second;
|
||||
|
||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
||||
|
||||
iterator_in_list_to_update = list.end();
|
||||
value_to_update = value;
|
||||
}
|
||||
}
|
||||
|
||||
value_type & get(const key_type &key)
|
||||
{
|
||||
auto iterator_in_map = hash_map.find(key);
|
||||
assert(iterator_in_map != hash_map.end());
|
||||
|
||||
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
|
||||
|
||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
||||
iterator_in_list_to_update = list.end();
|
||||
|
||||
return value_to_return;
|
||||
}
|
||||
|
||||
const value_type & get(const key_type & key) const
|
||||
{
|
||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
|
||||
}
|
||||
|
||||
size_t getMaxSize() const
|
||||
{
|
||||
return max_size;
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return hash_map.size();
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return hash_map.empty();
|
||||
}
|
||||
|
||||
bool contains(const Key & key)
|
||||
{
|
||||
return hash_map.find(key) != hash_map.end();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
hash_map.clear();
|
||||
list.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
map_type hash_map;
|
||||
list_type list;
|
||||
size_t max_size;
|
||||
};
|
||||
|
||||
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
|
||||
{
|
||||
std::vector<UInt64> numbers;
|
||||
numbers.reserve(numbers_to_insert_size);
|
||||
|
||||
std::random_device rd;
|
||||
pcg64 gen(rd());
|
||||
|
||||
UInt64 min = std::numeric_limits<UInt64>::min();
|
||||
UInt64 max = std::numeric_limits<UInt64>::max();
|
||||
|
||||
auto distribution = std::uniform_int_distribution<>(min, max);
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
UInt64 number = distribution(gen);
|
||||
numbers.emplace_back(number);
|
||||
}
|
||||
|
||||
return numbers;
|
||||
}
|
||||
|
||||
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
|
||||
HashMap<int, int> hash_map(preallocated ? map_size : 32);
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
auto * it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
if (it)
|
||||
summ += it->getMapped();
|
||||
}
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
}
|
||||
|
||||
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
|
||||
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
auto it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
if (it != hash_map.end())
|
||||
summ += it->second;
|
||||
}
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
}
|
||||
|
||||
template<typename LRUCache>
|
||||
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
|
||||
LRUCache cache(map_size, preallocated);
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
|
||||
}
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
if (cache.contains(numbers_to_insert[i]))
|
||||
summ += cache.get(numbers_to_insert[i]);
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
return summ;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
(void)(argc);
|
||||
(void)(argv);
|
||||
|
||||
size_t hash_map_size = 1200000;
|
||||
size_t numbers_to_insert_size = 12000000;
|
||||
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
|
||||
|
||||
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
|
||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
|
||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,18 +1,24 @@
|
||||
#include <boost/program_options.hpp>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/ConstraintsDescription.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/LimitReadBuffer.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Sources/SinkToOutputStream.h>
|
||||
#include <Processors/Executors/PipelineExecutor.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Poco/Net/MessageHeader.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <common/find_symbols.h>
|
||||
|
||||
|
||||
@ -39,7 +45,7 @@ ExternalTableDataPtr BaseExternalTable::getData(const Context & context)
|
||||
return data;
|
||||
}
|
||||
|
||||
void BaseExternalTable::clean()
|
||||
void BaseExternalTable::clear()
|
||||
{
|
||||
name.clear();
|
||||
file.clear();
|
||||
@ -49,17 +55,6 @@ void BaseExternalTable::clean()
|
||||
read_buffer.reset();
|
||||
}
|
||||
|
||||
/// Function for debugging information output
|
||||
void BaseExternalTable::write()
|
||||
{
|
||||
std::cerr << "file " << file << std::endl;
|
||||
std::cerr << "name " << name << std::endl;
|
||||
std::cerr << "format " << format << std::endl;
|
||||
std::cerr << "structure: \n";
|
||||
for (const auto & elem : structure)
|
||||
std::cerr << '\t' << elem.first << ' ' << elem.second << std::endl;
|
||||
}
|
||||
|
||||
void BaseExternalTable::parseStructureFromStructureField(const std::string & argument)
|
||||
{
|
||||
std::vector<std::string> vals;
|
||||
@ -182,7 +177,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
|
||||
executor->execute(/*num_threads = */ 1);
|
||||
|
||||
/// We are ready to receive the next file, for this we clear all the information received
|
||||
clean();
|
||||
clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -61,10 +61,7 @@ public:
|
||||
|
||||
protected:
|
||||
/// Clear all accumulated information
|
||||
void clean();
|
||||
|
||||
/// Function for debugging information output
|
||||
void write();
|
||||
void clear();
|
||||
|
||||
/// Construct the `structure` vector from the text field `structure`
|
||||
virtual void parseStructureFromStructureField(const std::string & argument);
|
||||
|
@ -86,8 +86,6 @@ class IColumn;
|
||||
\
|
||||
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
|
||||
\
|
||||
M(Milliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
|
||||
\
|
||||
M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
|
||||
M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
|
||||
\
|
||||
@ -377,6 +375,7 @@ class IColumn;
|
||||
M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
|
||||
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
|
||||
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
|
||||
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
|
||||
@ -420,6 +419,7 @@ class IColumn;
|
||||
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
|
||||
\
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||
M(Bool, allow_experimental_query_deduplication, false, "Allow sending parts' UUIDs for a query in order to deduplicate data parts if any", 0) \
|
||||
\
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
|
@ -1,13 +1,27 @@
|
||||
#include <DataStreams/AddingDefaultBlockOutputStream.h>
|
||||
#include <Interpreters/addMissingDefaults.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
AddingDefaultBlockOutputStream::AddingDefaultBlockOutputStream(
|
||||
const BlockOutputStreamPtr & output_,
|
||||
const Block & header_,
|
||||
const ColumnsDescription & columns_,
|
||||
const Context & context_)
|
||||
: output(output_), header(header_)
|
||||
{
|
||||
auto dag = addMissingDefaults(header_, output->getHeader().getNamesAndTypesList(), columns_, context_);
|
||||
adding_defaults_actions = std::make_shared<ExpressionActions>(std::move(dag));
|
||||
}
|
||||
|
||||
void AddingDefaultBlockOutputStream::write(const Block & block)
|
||||
{
|
||||
output->write(addMissingDefaults(block, output_block.getNamesAndTypesList(), columns, context));
|
||||
auto copy = block;
|
||||
adding_defaults_actions->execute(copy);
|
||||
output->write(copy);
|
||||
}
|
||||
|
||||
void AddingDefaultBlockOutputStream::flush()
|
||||
|
@ -8,6 +8,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ExpressionActions;
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
||||
class Context;
|
||||
|
||||
/** This stream adds three types of columns into block
|
||||
@ -22,13 +25,8 @@ public:
|
||||
AddingDefaultBlockOutputStream(
|
||||
const BlockOutputStreamPtr & output_,
|
||||
const Block & header_,
|
||||
const Block & output_block_,
|
||||
const ColumnsDescription & columns_,
|
||||
const Context & context_)
|
||||
: output(output_), header(header_), output_block(output_block_),
|
||||
columns(columns_), context(context_)
|
||||
{
|
||||
}
|
||||
const Context & context_);
|
||||
|
||||
Block getHeader() const override { return header; }
|
||||
void write(const Block & block) override;
|
||||
@ -41,10 +39,7 @@ public:
|
||||
private:
|
||||
BlockOutputStreamPtr output;
|
||||
const Block header;
|
||||
/// Blocks after this stream should have this structure
|
||||
const Block output_block;
|
||||
const ColumnsDescription columns;
|
||||
const Context & context;
|
||||
ExpressionActionsPtr adding_defaults_actions;
|
||||
};
|
||||
|
||||
|
||||
|
@ -171,7 +171,12 @@ Block AddingDefaultsBlockInputStream::readImpl()
|
||||
if (!evaluate_block.columns())
|
||||
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
|
||||
|
||||
evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
|
||||
auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false);
|
||||
if (dag)
|
||||
{
|
||||
auto actions = std::make_shared<ExpressionActions>(std::move(dag));
|
||||
actions->execute(evaluate_block);
|
||||
}
|
||||
|
||||
std::unordered_map<size_t, MutableColumnPtr> mixed_columns;
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <DataStreams/MaterializingBlockOutputStream.h>
|
||||
#include <DataStreams/SquashingBlockOutputStream.h>
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
|
@ -116,8 +116,35 @@ public:
|
||||
DataTypes argument_types = {nested_type};
|
||||
Array params_row;
|
||||
AggregateFunctionProperties properties;
|
||||
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
|
||||
AggregateFunctionPtr bitmap_function;
|
||||
WhichDataType which(nested_type);
|
||||
if (which.isUInt8())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt8>::name(), argument_types, params_row, properties);
|
||||
else if (which.isUInt16())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt16>::name(), argument_types, params_row, properties);
|
||||
else if (which.isUInt32())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
|
||||
else if (which.isUInt64())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt64>::name(), argument_types, params_row, properties);
|
||||
else if (which.isInt8())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<Int8>::name(), argument_types, params_row, properties);
|
||||
else if (which.isInt16())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<Int16>::name(), argument_types, params_row, properties);
|
||||
else if (which.isInt32())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<Int32>::name(), argument_types, params_row, properties);
|
||||
else if (which.isInt64())
|
||||
bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<Int64>::name(), argument_types, params_row, properties);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + array_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeAggregateFunction>(bitmap_function, argument_types, params_row);
|
||||
}
|
||||
@ -141,6 +168,14 @@ public:
|
||||
return executeBitmapData<UInt32>(argument_types, arguments);
|
||||
else if (which.isUInt64())
|
||||
return executeBitmapData<UInt64>(argument_types, arguments);
|
||||
else if (which.isInt8())
|
||||
return executeBitmapData<Int8>(argument_types, arguments);
|
||||
else if (which.isInt16())
|
||||
return executeBitmapData<Int16>(argument_types, arguments);
|
||||
else if (which.isInt32())
|
||||
return executeBitmapData<Int32>(argument_types, arguments);
|
||||
else if (which.isInt64())
|
||||
return executeBitmapData<Int64>(argument_types, arguments);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -161,7 +196,7 @@ private:
|
||||
Array params_row;
|
||||
AggregateFunctionProperties properties;
|
||||
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
|
||||
AggregateFunctionGroupBitmapData<UInt32>::name(), argument_types, params_row, properties);
|
||||
AggregateFunctionGroupBitmapData<T>::name(), argument_types, params_row, properties);
|
||||
auto col_to = ColumnAggregateFunction::create(bitmap_function);
|
||||
col_to->reserve(offsets.size());
|
||||
|
||||
@ -197,7 +232,7 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() =="groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -230,6 +265,14 @@ public:
|
||||
executeIntType<UInt32>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else if (which.isUInt64())
|
||||
executeIntType<UInt64>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else if (which.isInt8())
|
||||
executeIntType<Int8>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else if (which.isInt16())
|
||||
executeIntType<Int16>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else if (which.isInt32())
|
||||
executeIntType<Int32>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else if (which.isInt64())
|
||||
executeIntType<Int64>(arguments, input_rows_count, res_data, res_offsets);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -279,7 +322,7 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -312,6 +355,14 @@ public:
|
||||
return executeIntType<UInt32>(arguments, input_rows_count);
|
||||
else if (which.isUInt64())
|
||||
return executeIntType<UInt64>(arguments, input_rows_count);
|
||||
else if (which.isInt8())
|
||||
return executeIntType<Int8>(arguments, input_rows_count);
|
||||
else if (which.isInt16())
|
||||
return executeIntType<Int16>(arguments, input_rows_count);
|
||||
else if (which.isInt32())
|
||||
return executeIntType<Int32>(arguments, input_rows_count);
|
||||
else if (which.isInt64())
|
||||
return executeIntType<Int64>(arguments, input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -384,7 +435,11 @@ struct BitmapSubsetInRangeImpl
|
||||
public:
|
||||
static constexpr auto name = "bitmapSubsetInRange";
|
||||
template <typename T>
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
static void apply(
|
||||
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
|
||||
UInt64 range_start,
|
||||
UInt64 range_end,
|
||||
AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
{
|
||||
bitmap_data_0.rbs.rb_range(range_start, range_end, bitmap_data_2.rbs);
|
||||
}
|
||||
@ -395,7 +450,11 @@ struct BitmapSubsetLimitImpl
|
||||
public:
|
||||
static constexpr auto name = "bitmapSubsetLimit";
|
||||
template <typename T>
|
||||
static void apply(const AggregateFunctionGroupBitmapData<T> & bitmap_data_0, UInt64 range_start, UInt64 range_end, AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
static void apply(
|
||||
const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
|
||||
UInt64 range_start,
|
||||
UInt64 range_end,
|
||||
AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
|
||||
{
|
||||
bitmap_data_0.rbs.rb_limit(range_start, range_end, bitmap_data_2.rbs);
|
||||
}
|
||||
@ -421,7 +480,7 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const DataTypeAggregateFunction * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -456,6 +515,14 @@ public:
|
||||
return executeIntType<UInt32>(arguments, input_rows_count);
|
||||
else if (which.isUInt64())
|
||||
return executeIntType<UInt64>(arguments, input_rows_count);
|
||||
else if (which.isInt8())
|
||||
return executeIntType<Int8>(arguments, input_rows_count);
|
||||
else if (which.isInt16())
|
||||
return executeIntType<Int16>(arguments, input_rows_count);
|
||||
else if (which.isInt32())
|
||||
return executeIntType<Int32>(arguments, input_rows_count);
|
||||
else if (which.isInt64())
|
||||
return executeIntType<Int64>(arguments, input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -579,7 +646,7 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto * bitmap_type = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type && bitmap_type->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -604,6 +671,14 @@ public:
|
||||
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isUInt64())
|
||||
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt8())
|
||||
executeIntType<Int8>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt16())
|
||||
executeIntType<Int16>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt32())
|
||||
executeIntType<Int32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt64())
|
||||
executeIntType<Int64>(arguments, input_rows_count, vec_to);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -743,15 +818,15 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
WhichDataType which(arguments[1].get());
|
||||
if (!(which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()))
|
||||
if (!which.isNativeInt() && !which.isNativeUInt())
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be one of [UInt8, UInt16, UInt32, UInt64] but it has type " + arguments[1]->getName() + ".",
|
||||
"Second argument for function " + getName() + " must be an native integer type but it has type " + arguments[1]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeNumber<UInt8>>();
|
||||
@ -775,6 +850,14 @@ public:
|
||||
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isUInt64())
|
||||
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt8())
|
||||
executeIntType<Int8>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt16())
|
||||
executeIntType<Int16>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt32())
|
||||
executeIntType<Int32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt64())
|
||||
executeIntType<Int64>(arguments, input_rows_count, vec_to);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -839,15 +922,15 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
|
||||
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
|
||||
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
|
||||
@ -877,6 +960,14 @@ public:
|
||||
executeIntType<UInt32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isUInt64())
|
||||
executeIntType<UInt64>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt8())
|
||||
executeIntType<Int8>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt16())
|
||||
executeIntType<Int16>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt32())
|
||||
executeIntType<Int32>(arguments, input_rows_count, vec_to);
|
||||
else if (which.isInt64())
|
||||
executeIntType<Int64>(arguments, input_rows_count, vec_to);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -974,15 +1065,15 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto * bitmap_type0 = typeid_cast<const DataTypeAggregateFunction *>(arguments[0].get());
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type0 && bitmap_type0->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName() + ".",
|
||||
"First argument for function " + getName() + " must be a bitmap but it has type " + arguments[0]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto * bitmap_type1 = typeid_cast<const DataTypeAggregateFunction *>(arguments[1].get());
|
||||
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == AggregateFunctionGroupBitmapData<UInt32>::name()))
|
||||
if (!(bitmap_type1 && bitmap_type1->getFunctionName() == "groupBitmap"))
|
||||
throw Exception(
|
||||
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName() + ".",
|
||||
"Second argument for function " + getName() + " must be a bitmap but it has type " + arguments[1]->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (bitmap_type0->getArgumentsDataTypes()[0]->getTypeId() != bitmap_type1->getArgumentsDataTypes()[0]->getTypeId())
|
||||
@ -1009,6 +1100,14 @@ public:
|
||||
return executeBitmapData<UInt32>(arguments, input_rows_count);
|
||||
else if (which.isUInt64())
|
||||
return executeBitmapData<UInt64>(arguments, input_rows_count);
|
||||
else if (which.isUInt8())
|
||||
return executeBitmapData<UInt8>(arguments, input_rows_count);
|
||||
else if (which.isUInt16())
|
||||
return executeBitmapData<UInt16>(arguments, input_rows_count);
|
||||
else if (which.isUInt32())
|
||||
return executeBitmapData<UInt32>(arguments, input_rows_count);
|
||||
else if (which.isUInt64())
|
||||
return executeBitmapData<UInt64>(arguments, input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
@ -83,9 +83,9 @@ struct ArrayDifferenceImpl
|
||||
}
|
||||
res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
ColumnPtr res;
|
||||
@ -107,7 +107,6 @@ struct ArrayDifferenceImpl
|
||||
else
|
||||
throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct NameArrayDifference { static constexpr auto name = "arrayDifference"; };
|
||||
|
@ -27,6 +27,8 @@ public:
|
||||
return name;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 1;
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <Functions/replicate.h>
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
@ -11,60 +12,50 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
namespace
|
||||
DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} expect at least two arguments, got {}", getName(), arguments.size());
|
||||
|
||||
/** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
|
||||
*/
|
||||
class FunctionReplicate : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "replicate";
|
||||
|
||||
static FunctionPtr create(const Context &)
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
{
|
||||
return std::make_shared<FunctionReplicate>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
|
||||
if (!array_type)
|
||||
throw Exception("Second argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return std::make_shared<DataTypeArray>(arguments[0]);
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument {} for function {} must be array.",
|
||||
i + 1, getName());
|
||||
}
|
||||
return std::make_shared<DataTypeArray>(arguments[0]);
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
ColumnPtr FunctionReplicate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const
|
||||
{
|
||||
ColumnPtr first_column = arguments[0].column;
|
||||
ColumnPtr offsets;
|
||||
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
{
|
||||
ColumnPtr first_column = arguments[0].column;
|
||||
const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[1].column.get());
|
||||
const ColumnArray * array_column = checkAndGetColumn<ColumnArray>(arguments[i].column.get());
|
||||
ColumnPtr temp_column;
|
||||
if (!array_column)
|
||||
{
|
||||
const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
|
||||
const auto * const_array_column = checkAndGetColumnConst<ColumnArray>(arguments[i].column.get());
|
||||
if (!const_array_column)
|
||||
throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN);
|
||||
temp_column = const_array_column->convertToFullColumn();
|
||||
array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
|
||||
}
|
||||
return ColumnArray::create(first_column->replicate(array_column->getOffsets())->convertToFullColumnIfConst(), array_column->getOffsetsPtr());
|
||||
}
|
||||
};
|
||||
|
||||
if (!offsets || offsets->empty())
|
||||
offsets = array_column->getOffsetsPtr();
|
||||
}
|
||||
|
||||
const auto & offsets_data = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets).getData();
|
||||
return ColumnArray::create(first_column->replicate(offsets_data)->convertToFullColumnIfConst(), offsets);
|
||||
}
|
||||
|
||||
void registerFunctionReplicate(FunctionFactory & factory)
|
||||
|
40
src/Functions/replicate.h
Normal file
40
src/Functions/replicate.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/// Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
|
||||
/// Function may accept more then two arguments. If so, the first array with non-empty offsets is chosen.
|
||||
class FunctionReplicate : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "replicate";
|
||||
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionReplicate>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override;
|
||||
};
|
||||
|
||||
}
|
@ -13,7 +13,7 @@ namespace DB
|
||||
*
|
||||
* While using this object, no other allocations in arena are possible.
|
||||
*/
|
||||
class WriteBufferFromArena : public WriteBuffer
|
||||
class WriteBufferFromArena final : public WriteBuffer
|
||||
{
|
||||
private:
|
||||
Arena & arena;
|
||||
|
@ -39,16 +39,26 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
|
||||
for (const auto & input : inputs_)
|
||||
{
|
||||
if (input.column && isColumnConst(*input.column))
|
||||
{
|
||||
addInput(input, true);
|
||||
|
||||
/// Here we also add column.
|
||||
/// It will allow to remove input which is actually constant (after projection).
|
||||
/// Also, some transforms from query pipeline may randomly materialize constants,
|
||||
/// without any respect to header structure. So, it is a way to drop materialized column and use
|
||||
/// constant value from header.
|
||||
/// We cannot remove such input right now cause inputs positions are important in some cases.
|
||||
addColumn(input, true);
|
||||
}
|
||||
else
|
||||
addInput(input.name, input.type, true);
|
||||
}
|
||||
}
|
||||
|
||||
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
|
||||
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index)
|
||||
{
|
||||
auto it = index.find(node.result_name);
|
||||
if (it != index.end() && !can_replace)
|
||||
if (it != index.end() && !can_replace && add_to_index)
|
||||
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
||||
|
||||
auto & res = nodes.emplace_back(std::move(node));
|
||||
@ -56,7 +66,8 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
|
||||
if (res.type == ActionType::INPUT)
|
||||
inputs.emplace_back(&res);
|
||||
|
||||
index.replace(&res);
|
||||
if (add_to_index)
|
||||
index.replace(&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -90,7 +101,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
|
||||
return addNode(std::move(node), can_replace);
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace)
|
||||
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize)
|
||||
{
|
||||
if (!column.column)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
|
||||
@ -101,7 +112,22 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
|
||||
node.result_name = std::move(column.name);
|
||||
node.column = std::move(column.column);
|
||||
|
||||
return addNode(std::move(node), can_replace);
|
||||
auto * res = &addNode(std::move(node), can_replace, !materialize);
|
||||
|
||||
if (materialize)
|
||||
{
|
||||
auto & name = res->result_name;
|
||||
|
||||
FunctionOverloadResolverPtr func_builder_materialize =
|
||||
std::make_shared<FunctionOverloadResolverAdaptor>(
|
||||
std::make_unique<DefaultOverloadResolver>(
|
||||
std::make_shared<FunctionMaterialize>()));
|
||||
|
||||
res = &addFunction(func_builder_materialize, {res}, {}, true, false);
|
||||
res = &addAlias(*res, name, true);
|
||||
}
|
||||
|
||||
return *res;
|
||||
}
|
||||
|
||||
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
|
||||
@ -116,7 +142,6 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
|
||||
node.result_type = child.result_type;
|
||||
node.result_name = std::move(alias);
|
||||
node.column = child.column;
|
||||
node.allow_constant_folding = child.allow_constant_folding;
|
||||
node.children.emplace_back(&child);
|
||||
|
||||
return addNode(std::move(node), can_replace);
|
||||
@ -143,7 +168,8 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
const Names & argument_names,
|
||||
std::string result_name,
|
||||
const Context & context [[maybe_unused]])
|
||||
const Context & context [[maybe_unused]],
|
||||
bool can_replace)
|
||||
{
|
||||
const auto & all_settings = context.getSettingsRef();
|
||||
settings.max_temporary_columns = all_settings.max_temporary_columns;
|
||||
@ -162,14 +188,15 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
for (const auto & name : argument_names)
|
||||
children.push_back(&getNode(name));
|
||||
|
||||
return addFunction(function, children, std::move(result_name), false);
|
||||
return addFunction(function, children, std::move(result_name), can_replace);
|
||||
}
|
||||
|
||||
ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
Inputs children,
|
||||
std::string result_name,
|
||||
bool can_replace)
|
||||
bool can_replace,
|
||||
bool add_to_index)
|
||||
{
|
||||
size_t num_arguments = children.size();
|
||||
|
||||
@ -184,7 +211,6 @@ ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
for (size_t i = 0; i < num_arguments; ++i)
|
||||
{
|
||||
auto & child = *node.children[i];
|
||||
node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
|
||||
|
||||
ColumnWithTypeAndName argument;
|
||||
argument.column = child.column;
|
||||
@ -250,7 +276,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
|
||||
|
||||
node.result_name = std::move(result_name);
|
||||
|
||||
return addNode(std::move(node), can_replace);
|
||||
return addNode(std::move(node), can_replace, add_to_index);
|
||||
}
|
||||
|
||||
|
||||
@ -349,10 +375,15 @@ void ActionsDAG::removeUnusedActions()
|
||||
stack.push(node);
|
||||
}
|
||||
|
||||
/// We cannot remove arrayJoin because it changes the number of rows.
|
||||
for (auto & node : nodes)
|
||||
{
|
||||
if (node.type == ActionType::ARRAY_JOIN && visited_nodes.count(&node) == 0)
|
||||
/// We cannot remove function with side effects even if it returns constant (e.g. ignore(...)).
|
||||
bool prevent_constant_folding = node.column && isColumnConst(*node.column) && !node.allow_constant_folding;
|
||||
/// We cannot remove arrayJoin because it changes the number of rows.
|
||||
bool is_array_join = node.type == ActionType::ARRAY_JOIN;
|
||||
|
||||
bool must_keep_node = is_array_join || prevent_constant_folding;
|
||||
if (must_keep_node && visited_nodes.count(&node) == 0)
|
||||
{
|
||||
visited_nodes.insert(&node);
|
||||
stack.push(&node);
|
||||
@ -410,7 +441,6 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
|
||||
node.result_type = child->result_type;
|
||||
node.result_name = std::move(item.second);
|
||||
node.column = child->column;
|
||||
node.allow_constant_folding = child->allow_constant_folding;
|
||||
node.children.emplace_back(child);
|
||||
|
||||
auto & alias = addNode(std::move(node), true);
|
||||
@ -625,6 +655,26 @@ bool ActionsDAG::trivial() const
|
||||
return true;
|
||||
}
|
||||
|
||||
void ActionsDAG::addMaterializingOutputActions()
|
||||
{
|
||||
FunctionOverloadResolverPtr func_builder_materialize =
|
||||
std::make_shared<FunctionOverloadResolverAdaptor>(
|
||||
std::make_unique<DefaultOverloadResolver>(
|
||||
std::make_shared<FunctionMaterialize>()));
|
||||
|
||||
Index new_index;
|
||||
std::vector<Node *> index_nodes(index.begin(), index.end());
|
||||
for (auto * node : index_nodes)
|
||||
{
|
||||
auto & name = node->result_name;
|
||||
node = &addFunction(func_builder_materialize, {node}, {}, true, false);
|
||||
node = &addAlias(*node, name, true);
|
||||
new_index.insert(node);
|
||||
}
|
||||
|
||||
index.swap(new_index);
|
||||
}
|
||||
|
||||
ActionsDAGPtr ActionsDAG::makeConvertingActions(
|
||||
const ColumnsWithTypeAndName & source,
|
||||
const ColumnsWithTypeAndName & result,
|
||||
@ -734,6 +784,23 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
|
||||
return actions_dag;
|
||||
}
|
||||
|
||||
ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
|
||||
{
|
||||
auto adding_column_action = std::make_shared<ActionsDAG>();
|
||||
FunctionOverloadResolverPtr func_builder_materialize =
|
||||
std::make_shared<FunctionOverloadResolverAdaptor>(
|
||||
std::make_unique<DefaultOverloadResolver>(
|
||||
std::make_shared<FunctionMaterialize>()));
|
||||
|
||||
auto column_name = column.name;
|
||||
const auto & column_node = adding_column_action->addColumn(std::move(column));
|
||||
Inputs inputs = {const_cast<Node *>(&column_node)};
|
||||
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true);
|
||||
adding_column_action->addAlias(function_node, std::move(column_name), true);
|
||||
|
||||
return adding_column_action;
|
||||
}
|
||||
|
||||
ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
|
||||
{
|
||||
/// first: x (1), x (2), y ==> x (2), z, x (3)
|
||||
|
@ -198,14 +198,15 @@ public:
|
||||
|
||||
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
|
||||
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
|
||||
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false);
|
||||
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
|
||||
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
|
||||
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
|
||||
const Node & addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
const Names & argument_names,
|
||||
std::string result_name,
|
||||
const Context & context);
|
||||
const Context & context,
|
||||
bool can_replace = false);
|
||||
|
||||
/// Call addAlias several times.
|
||||
void addAliases(const NamesWithAliases & aliases);
|
||||
@ -232,6 +233,9 @@ public:
|
||||
|
||||
ActionsDAGPtr clone() const;
|
||||
|
||||
/// For apply materialize() function for every output.
|
||||
/// Also add aliases so the result names remain unchanged.
|
||||
void addMaterializingOutputActions();
|
||||
|
||||
enum class MatchColumnsMode
|
||||
{
|
||||
@ -250,6 +254,9 @@ public:
|
||||
MatchColumnsMode mode,
|
||||
bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
|
||||
|
||||
/// Create expression which add const column and then materialize it.
|
||||
static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
|
||||
|
||||
/// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
|
||||
/// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
|
||||
/// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.
|
||||
@ -272,7 +279,7 @@ public:
|
||||
SplitResult splitActionsForFilter(const std::string & column_name) const;
|
||||
|
||||
private:
|
||||
Node & addNode(Node node, bool can_replace = false);
|
||||
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
|
||||
Node & getNode(const std::string & name);
|
||||
|
||||
Node & addAlias(Node & child, std::string alias, bool can_replace);
|
||||
@ -280,7 +287,8 @@ private:
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
Inputs children,
|
||||
std::string result_name,
|
||||
bool can_replace);
|
||||
bool can_replace,
|
||||
bool add_to_index = true);
|
||||
|
||||
ActionsDAGPtr cloneEmpty() const
|
||||
{
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
|
||||
|
||||
#include <Poco/String.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -89,15 +91,18 @@ const String & changeNameIfNeeded(const String & func_name, const String & child
|
||||
|
||||
ASTPtr tryExchangeFunctions(const ASTFunction & func)
|
||||
{
|
||||
static const std::unordered_map<String, std::unordered_set<String>> supported = {
|
||||
{ "sum", { "multiply", "divide" } },
|
||||
{ "min", { "multiply", "divide", "plus", "minus" } },
|
||||
{ "max", { "multiply", "divide", "plus", "minus" } }
|
||||
};
|
||||
static const std::unordered_map<String, std::unordered_set<String>> supported
|
||||
= {{"sum", {"multiply", "divide"}},
|
||||
{"min", {"multiply", "divide", "plus", "minus"}},
|
||||
{"max", {"multiply", "divide", "plus", "minus"}},
|
||||
{"avg", {"multiply", "divide", "plus", "minus"}}};
|
||||
|
||||
/// Aggregate functions[sum|min|max|avg] is case-insensitive, so we use lower cases name
|
||||
auto lower_name = Poco::toLower(func.name);
|
||||
|
||||
const ASTFunction * child_func = getInternalFunction(func);
|
||||
if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 ||
|
||||
!supported.count(func.name) || !supported.find(func.name)->second.count(child_func->name))
|
||||
if (!child_func || !child_func->arguments || child_func->arguments->children.size() != 2 || !supported.count(lower_name)
|
||||
|| !supported.find(lower_name)->second.count(child_func->name))
|
||||
return {};
|
||||
|
||||
/// Cannot rewrite function with alias cause alias could become undefined
|
||||
@ -116,12 +121,12 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
|
||||
if (child_func->name == "divide")
|
||||
return {};
|
||||
|
||||
const String & new_name = changeNameIfNeeded(func.name, child_func->name, *first_literal);
|
||||
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal);
|
||||
optimized_ast = exchangeExtractFirstArgument(new_name, *child_func);
|
||||
}
|
||||
else if (second_literal) /// second or both are consts
|
||||
{
|
||||
const String & new_name = changeNameIfNeeded(func.name, child_func->name, *second_literal);
|
||||
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal);
|
||||
optimized_ast = exchangeExtractSecondArgument(new_name, *child_func);
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ class ASTFunction;
|
||||
/// Extract constant arguments out of aggregate functions from child functions
|
||||
/// 'sum(a * 2)' -> 'sum(a) * 2'
|
||||
/// Rewrites: sum([multiply|divide]) -> [multiply|divide](sum)
|
||||
/// [min|max]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max])
|
||||
/// [min|max|avg]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max|avg])
|
||||
/// TODO: groupBitAnd, groupBitOr, groupBitXor
|
||||
/// TODO: better constant detection: f(const) is not detected as const.
|
||||
/// TODO: 'f((2 * n) * n)' -> '2 * f(n * n)'
|
||||
|
@ -331,7 +331,7 @@ struct ContextShared
|
||||
mutable std::optional<ExternalModelsLoader> external_models_loader;
|
||||
String default_profile_name; /// Default profile name used for default values.
|
||||
String system_profile_name; /// Profile used by system processes
|
||||
AccessControlManager access_control_manager;
|
||||
std::unique_ptr<AccessControlManager> access_control_manager;
|
||||
mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks.
|
||||
mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files.
|
||||
ProcessList process_list; /// Executing queries at the moment.
|
||||
@ -388,7 +388,8 @@ struct ContextShared
|
||||
Context::ConfigReloadCallback config_reload_callback;
|
||||
|
||||
ContextShared()
|
||||
: macros(std::make_unique<Macros>())
|
||||
: access_control_manager(std::make_unique<AccessControlManager>())
|
||||
, macros(std::make_unique<Macros>())
|
||||
{
|
||||
/// TODO: make it singleton (?)
|
||||
static std::atomic<size_t> num_calls{0};
|
||||
@ -434,6 +435,7 @@ struct ContextShared
|
||||
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
|
||||
/// TODO: Get rid of this.
|
||||
|
||||
access_control_manager.reset();
|
||||
system_logs.reset();
|
||||
embedded_dictionaries.reset();
|
||||
external_dictionaries_loader.reset();
|
||||
@ -640,7 +642,7 @@ void Context::setConfig(const ConfigurationPtr & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
shared->config = config;
|
||||
shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config);
|
||||
shared->access_control_manager->setExternalAuthenticatorsConfig(*shared->config);
|
||||
}
|
||||
|
||||
const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
|
||||
@ -652,25 +654,25 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
|
||||
|
||||
AccessControlManager & Context::getAccessControlManager()
|
||||
{
|
||||
return shared->access_control_manager;
|
||||
return *shared->access_control_manager;
|
||||
}
|
||||
|
||||
const AccessControlManager & Context::getAccessControlManager() const
|
||||
{
|
||||
return shared->access_control_manager;
|
||||
return *shared->access_control_manager;
|
||||
}
|
||||
|
||||
void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
shared->access_control_manager.setExternalAuthenticatorsConfig(config);
|
||||
shared->access_control_manager->setExternalAuthenticatorsConfig(config);
|
||||
}
|
||||
|
||||
void Context::setUsersConfig(const ConfigurationPtr & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
shared->users_config = config;
|
||||
shared->access_control_manager.setUsersConfig(*shared->users_config);
|
||||
shared->access_control_manager->setUsersConfig(*shared->users_config);
|
||||
}
|
||||
|
||||
ConfigurationPtr Context::getUsersConfig()
|
||||
@ -1138,12 +1140,6 @@ String Context::getCurrentDatabase() const
|
||||
}
|
||||
|
||||
|
||||
String Context::getCurrentQueryId() const
|
||||
{
|
||||
return client_info.current_query_id;
|
||||
}
|
||||
|
||||
|
||||
String Context::getInitialQueryId() const
|
||||
{
|
||||
return client_info.initial_query_id;
|
||||
|
@ -441,7 +441,7 @@ public:
|
||||
StoragePtr getViewSource();
|
||||
|
||||
String getCurrentDatabase() const;
|
||||
String getCurrentQueryId() const;
|
||||
String getCurrentQueryId() const { return client_info.current_query_id; }
|
||||
|
||||
/// Id of initiating query for distributed queries; or current query id if it's not a distributed query.
|
||||
String getInitialQueryId() const;
|
||||
|
@ -610,12 +610,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
|
||||
ReadBufferFromString istr(query_to_execute);
|
||||
String dummy_string;
|
||||
WriteBufferFromString ostr(dummy_string);
|
||||
std::optional<CurrentThread::QueryScope> query_scope;
|
||||
|
||||
try
|
||||
{
|
||||
auto current_context = std::make_unique<Context>(context);
|
||||
current_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
|
||||
current_context->setCurrentQueryId(""); // generate random query_id
|
||||
query_scope.emplace(*current_context);
|
||||
executeQuery(istr, ostr, false, *current_context, {});
|
||||
}
|
||||
catch (...)
|
||||
@ -632,20 +634,6 @@ bool DDLWorker::tryExecuteQuery(const String & query, const DDLTask & task, Exec
|
||||
return true;
|
||||
}
|
||||
|
||||
void DDLWorker::attachToThreadGroup()
|
||||
{
|
||||
if (thread_group)
|
||||
{
|
||||
/// Put all threads to one thread pool
|
||||
CurrentThread::attachToIfDetached(thread_group);
|
||||
}
|
||||
else
|
||||
{
|
||||
CurrentThread::initializeQuery();
|
||||
thread_group = CurrentThread::getGroup();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::enqueueTask(DDLTaskPtr task_ptr)
|
||||
{
|
||||
@ -1148,8 +1136,6 @@ void DDLWorker::runMainThread()
|
||||
{
|
||||
try
|
||||
{
|
||||
attachToThreadGroup();
|
||||
|
||||
cleanup_event->set();
|
||||
scheduleTasks();
|
||||
|
||||
@ -1217,7 +1203,7 @@ void DDLWorker::runCleanupThread()
|
||||
}
|
||||
|
||||
|
||||
class DDLQueryStatusInputStream : public IBlockInputStream
|
||||
class DDLQueryStatusInputStream final : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
|
||||
|
@ -162,8 +162,6 @@ private:
|
||||
void runMainThread();
|
||||
void runCleanupThread();
|
||||
|
||||
void attachToThreadGroup();
|
||||
|
||||
private:
|
||||
Context context;
|
||||
Poco::Logger * log;
|
||||
@ -196,8 +194,6 @@ private:
|
||||
/// How many tasks could be in the queue
|
||||
size_t max_tasks_in_queue = 1000;
|
||||
|
||||
ThreadGroupStatusPtr thread_group;
|
||||
|
||||
std::atomic<UInt64> max_id = 0;
|
||||
|
||||
friend class DDLQueryStatusInputStream;
|
||||
|
@ -37,7 +37,7 @@ void DNSCacheUpdater::run()
|
||||
* - automatically throttle when DNS requests take longer time;
|
||||
* - add natural randomization on huge clusters - avoid sending all requests at the same moment of time from different servers.
|
||||
*/
|
||||
task_handle->scheduleAfter(update_period_seconds * 1000);
|
||||
task_handle->scheduleAfter(size_t(update_period_seconds) * 1000);
|
||||
}
|
||||
|
||||
void DNSCacheUpdater::start()
|
||||
|
@ -96,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
|
||||
|
||||
ASTPtr subquery_select = subquery.children.at(0);
|
||||
|
||||
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1);
|
||||
auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true);
|
||||
options.analyze(data.only_analyze);
|
||||
|
||||
auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options);
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <Core/Block.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user