mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'master' into in_memory_raft
This commit is contained in:
commit
0aca40d8cb
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -184,7 +184,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
url = https://github.com/ClickHouse-Extras/cyrus-sasl
|
||||
branch = cyrus-sasl-2.1
|
||||
[submodule "contrib/croaring"]
|
||||
path = contrib/croaring
|
||||
|
@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
|
||||
int sched_getcpu(void)
|
||||
{
|
||||
int r;
|
||||
unsigned cpu;
|
||||
unsigned cpu = 0;
|
||||
|
||||
#ifdef VDSO_GETCPU_SYM
|
||||
getcpu_f f = (getcpu_f)vdso_func;
|
||||
|
@ -116,8 +116,8 @@ void Connection::connect(const char* db,
|
||||
if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Sets UTF-8 as default encoding.
|
||||
if (mysql_set_character_set(driver.get(), "UTF8"))
|
||||
/// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/
|
||||
if (mysql_set_character_set(driver.get(), "utf8mb4"))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
is_connected = true;
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54447)
|
||||
SET(VERSION_REVISION 54448)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 2)
|
||||
SET(VERSION_MINOR 3)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
|
||||
SET(VERSION_DESCRIBE v21.2.1.1-prestable)
|
||||
SET(VERSION_STRING 21.2.1.1)
|
||||
SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
|
||||
SET(VERSION_DESCRIBE v21.3.1.1-prestable)
|
||||
SET(VERSION_STRING 21.3.1.1)
|
||||
# end of autochange
|
||||
|
2
contrib/hyperscan
vendored
2
contrib/hyperscan
vendored
@ -1 +1 @@
|
||||
Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531
|
||||
Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
|
2
contrib/poco
vendored
2
contrib/poco
vendored
@ -1 +1 @@
|
||||
Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823
|
||||
Subproject commit e11f3c971570cf6a31006cd21cadf41a259c360a
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (21.2.1.1) unstable; urgency=low
|
||||
clickhouse (21.3.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 11 Jan 2021 11:12:08 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 01 Feb 2021 12:50:53 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=21.2.1.*
|
||||
ARG version=21.3.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -253,8 +253,12 @@ function run_tests
|
||||
00701_rollup
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
|
||||
# Hyperscan
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01681_hyperscan_debug_assertion
|
||||
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
|
@ -1,12 +1,16 @@
|
||||
# docker build -t yandex/clickhouse-style-test .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
|
||||
|
||||
|
||||
# For |& syntax
|
||||
SHELL ["bash", "-c"]
|
||||
|
||||
CMD cd /ClickHouse/utils/check-style && \
|
||||
./check-style -n | tee /test_output/style_output.txt && \
|
||||
./check-typos | tee /test_output/typos_output.txt && \
|
||||
./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \
|
||||
./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \
|
||||
./shellcheck-run.sh | tee /test_output/shellcheck_output.txt
|
||||
./check-style -n |& tee /test_output/style_output.txt && \
|
||||
./check-typos |& tee /test_output/typos_output.txt && \
|
||||
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt && \
|
||||
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \
|
||||
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt && \
|
||||
true
|
||||
|
@ -1944,6 +1944,21 @@ Possible values:
|
||||
|
||||
Default value: 16.
|
||||
|
||||
## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
|
||||
|
||||
Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any positive integer.
|
||||
|
||||
Default value: 16.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
|
||||
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
|
||||
|
||||
## validate_polygons {#validate_polygons}
|
||||
|
||||
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
|
||||
|
@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `window` — Length of the sliding window in seconds.
|
||||
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
|
||||
- `mode` - It is an optional argument.
|
||||
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
|
||||
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
|
||||
|
@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
|
||||
|
||||
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
|
||||
|
||||
Note that these functions work slowly.
|
||||
Note that these functions work slowly until ClickHouse 21.1.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -52,57 +52,38 @@ Query:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
`comment` String,
|
||||
`secret` String
|
||||
)
|
||||
ENGINE = Memory
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
|
||||
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
|
||||
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
|
||||
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm`:
|
||||
@ -110,40 +91,26 @@ Example with `-gcm`:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
|
||||
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
|
||||
|
||||
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm` mode and with `aad`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
|
||||
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported encryption modes:
|
||||
|
||||
@ -156,7 +123,7 @@ Supported encryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
Result:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
┌─ciphertexts_equal─┐
|
||||
│ 1 │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
But `encrypt` fails when `key` or `iv` is longer than expected:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
Received exception from server (version 21.1.2):
|
||||
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Notice how supplying even longer `IV` produces the same result
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
┌─ciphertext───┐
|
||||
│ 24E9E4966469 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Which is binary equal to what MySQL produces on same inputs:
|
||||
|
||||
``` sql
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
This function decrypts data using these modes:
|
||||
This function decrypts ciphertext into a plaintext using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
@ -247,7 +234,7 @@ This function decrypts data using these modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
Re-using table from [encrypt](./encryption-functions.md#encrypt).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
SELECT comment, hex(secret) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
``` text
|
||||
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
|
||||
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
|
||||
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
|
||||
└──────────────────────┴──────────────────────────────────────────────┘
|
||||
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
|
||||
│ aes-256-cfb128 no IV │ B4972BDC4459 │
|
||||
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
|
||||
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
|
||||
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
|
||||
└─────────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
Now let's try to decrypt all that data.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
|
||||
```
|
||||
|
||||
Result:
|
||||
``` text
|
||||
┌─comment─────────────────────────────┬─plaintext─┐
|
||||
│ aes-256-cfb128 no IV │ Secret │
|
||||
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
|
||||
<20> │
|
||||
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
|
||||
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
|
||||
└─────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
||||
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
|
||||
|
||||
Supported decryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
@ -321,7 +313,7 @@ Supported decryption modes:
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
``` sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
Let's decrypt data we've previously encrypted with MySQL:
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
mysql> SET block_encryption_mode='aes-256-cfb128';
|
||||
Query OK, 0 rows affected (0.00 sec)
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
|
||||
+------------------------+
|
||||
| ciphertext |
|
||||
+------------------------+
|
||||
| 0x24E9E4966469 |
|
||||
+------------------------+
|
||||
1 row in set (0.00 sec)
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─plaintext─┐
|
||||
│ Secret │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
|
||||
|
@ -116,8 +116,19 @@ LIMIT 10
|
||||
## IPv6StringToNum(s) {#ipv6stringtonums}
|
||||
|
||||
The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes.
|
||||
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
|
||||
HEX can be uppercase or lowercase.
|
||||
|
||||
``` sql
|
||||
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐
|
||||
│ ::ffff:127.0.0.1 │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## IPv4ToIPv6(x) {#ipv4toipv6x}
|
||||
|
||||
Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples:
|
||||
@ -214,6 +225,7 @@ SELECT
|
||||
## toIPv6(string) {#toipv6string}
|
||||
|
||||
An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`.
|
||||
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
@ -243,6 +255,15 @@ SELECT
|
||||
└───────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT toIPv6('127.0.0.1')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─toIPv6('127.0.0.1')─┐
|
||||
│ ::ffff:127.0.0.1 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## isIPv4String
|
||||
|
||||
|
@ -62,12 +62,12 @@ public:
|
||||
bool randomize_, size_t max_iterations_, double max_time_,
|
||||
const String & json_path_, size_t confidence_,
|
||||
const String & query_id_, const String & query_to_execute_, bool continue_on_errors_,
|
||||
bool print_stacktrace_, const Settings & settings_)
|
||||
bool reconnect_, bool print_stacktrace_, const Settings & settings_)
|
||||
:
|
||||
concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
|
||||
cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
|
||||
json_path(json_path_), confidence(confidence_), query_id(query_id_),
|
||||
query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_),
|
||||
query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_),
|
||||
print_stacktrace(print_stacktrace_), settings(settings_),
|
||||
shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
|
||||
pool(concurrency)
|
||||
@ -155,6 +155,7 @@ private:
|
||||
String query_id;
|
||||
String query_to_execute;
|
||||
bool continue_on_errors;
|
||||
bool reconnect;
|
||||
bool print_stacktrace;
|
||||
const Settings & settings;
|
||||
SharedContextHolder shared_context;
|
||||
@ -404,9 +405,14 @@ private:
|
||||
void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
Connection & connection = **connection_entries[connection_index];
|
||||
|
||||
if (reconnect)
|
||||
connection.disconnect();
|
||||
|
||||
RemoteBlockInputStream stream(
|
||||
*(*connection_entries[connection_index]),
|
||||
query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
|
||||
connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
|
||||
if (!query_id.empty())
|
||||
stream.setQueryId(query_id);
|
||||
|
||||
@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
|
||||
("confidence", value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
|
||||
("query_id", value<std::string>()->default_value(""), "")
|
||||
("continue_on_errors", "continue testing even if a query fails")
|
||||
("reconnect", "establish new connection for every query")
|
||||
;
|
||||
|
||||
Settings settings;
|
||||
@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
|
||||
options["confidence"].as<size_t>(),
|
||||
options["query_id"].as<std::string>(),
|
||||
options["query"].as<std::string>(),
|
||||
options.count("continue_on_errors") > 0,
|
||||
options.count("continue_on_errors"),
|
||||
options.count("reconnect"),
|
||||
print_stacktrace,
|
||||
settings);
|
||||
return benchmark.run();
|
||||
|
@ -513,7 +513,7 @@ private:
|
||||
}
|
||||
|
||||
protected:
|
||||
void extractColumns(const IColumn ** columns, const IColumn ** aggr_columns) const
|
||||
ssize_t extractColumns(const IColumn ** columns, const IColumn ** aggr_columns, ssize_t if_argument_pos) const
|
||||
{
|
||||
if (tuple_argument)
|
||||
{
|
||||
@ -526,6 +526,13 @@ protected:
|
||||
for (size_t i = 0; i < args_count; ++i)
|
||||
columns[i] = aggr_columns[i];
|
||||
}
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
columns[args_count] = aggr_columns[if_argument_pos];
|
||||
return args_count;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool tuple_argument;
|
||||
@ -551,8 +558,8 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
|
||||
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
|
||||
|
||||
Base::addBatch(batch_size, places, place_offset, ex_columns, arena, if_argument_pos);
|
||||
}
|
||||
@ -560,8 +567,8 @@ public:
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
|
||||
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
|
||||
|
||||
Base::addBatchSinglePlace(batch_size, place, ex_columns, arena, if_argument_pos);
|
||||
}
|
||||
@ -574,8 +581,8 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
|
||||
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
|
||||
|
||||
Base::addBatchSinglePlaceNotNull(batch_size, place, ex_columns, null_map, arena, if_argument_pos);
|
||||
}
|
||||
@ -584,8 +591,8 @@ public:
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
|
||||
const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
|
||||
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
|
||||
|
||||
Base::addBatchSinglePlaceFromInterval(batch_begin, batch_end, place, ex_columns, arena, if_argument_pos);
|
||||
}
|
||||
@ -595,7 +602,7 @@ public:
|
||||
const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
extractColumns(ex_columns, columns, -1);
|
||||
|
||||
Base::addBatchArray(batch_size, places, place_offset, ex_columns, offsets, arena);
|
||||
}
|
||||
@ -610,7 +617,7 @@ public:
|
||||
Arena * arena) const override
|
||||
{
|
||||
const IColumn * ex_columns[args_count];
|
||||
extractColumns(ex_columns, columns);
|
||||
extractColumns(ex_columns, columns, -1);
|
||||
|
||||
Base::addBatchLookupTable8(batch_size, map, place_offset, init, key, ex_columns, arena);
|
||||
}
|
||||
|
@ -191,6 +191,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
|
||||
add_object_library(clickhouse_processors_merges Processors/Merges)
|
||||
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
|
||||
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
|
||||
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
|
||||
add_object_library(clickhouse_coordination Coordination)
|
||||
|
||||
set (DBMS_COMMON_LIBRARIES)
|
||||
|
@ -7,8 +7,10 @@
|
||||
#include <atomic>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/copyData.h>
|
||||
|
||||
|
||||
/** In a loop it connects to the server and immediately breaks the connection.
|
||||
@ -18,22 +20,26 @@
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
size_t num_iterations = 1;
|
||||
size_t num_threads = 1;
|
||||
std::string host = "localhost";
|
||||
uint16_t port = 9000;
|
||||
|
||||
if (argc >= 2)
|
||||
num_iterations = DB::parse<size_t>(argv[1]);
|
||||
num_iterations = parse<size_t>(argv[1]);
|
||||
|
||||
if (argc >= 3)
|
||||
num_threads = DB::parse<size_t>(argv[2]);
|
||||
num_threads = parse<size_t>(argv[2]);
|
||||
|
||||
if (argc >= 4)
|
||||
host = argv[3];
|
||||
|
||||
if (argc >= 5)
|
||||
port = DB::parse<uint16_t>(argv[4]);
|
||||
port = parse<uint16_t>(argv[4]);
|
||||
|
||||
WriteBufferFromFileDescriptor out(STDERR_FILENO);
|
||||
|
||||
std::atomic_bool cancel{false};
|
||||
std::vector<std::thread> threads(num_threads);
|
||||
@ -45,44 +51,32 @@ try
|
||||
{
|
||||
std::cerr << ".";
|
||||
|
||||
Poco::Net::SocketAddress address(host, port);
|
||||
|
||||
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
|
||||
|
||||
if (fd < 0)
|
||||
DB::throwFromErrno("Cannot create socket", 0);
|
||||
|
||||
linger linger_value;
|
||||
linger_value.l_onoff = 1;
|
||||
linger_value.l_linger = 0;
|
||||
|
||||
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
|
||||
DB::throwFromErrno("Cannot set linger", 0);
|
||||
|
||||
try
|
||||
{
|
||||
Stopwatch watch;
|
||||
Poco::Net::SocketAddress address(host, port);
|
||||
Poco::Net::StreamSocket socket;
|
||||
//socket.setLinger(1, 0);
|
||||
|
||||
int res = connect(fd, address.addr(), address.length());
|
||||
|
||||
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
|
||||
socket.connectNB(address);
|
||||
if (!socket.poll(Poco::Timespan(1000000),
|
||||
Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_WRITE | Poco::Net::Socket::SELECT_ERROR))
|
||||
{
|
||||
close(fd);
|
||||
DB::throwFromErrno("Cannot connect", 0);
|
||||
}
|
||||
/// Allow to debug the server.
|
||||
/* auto command = ShellCommand::execute("kill -STOP $(pidof clickhouse-server)");
|
||||
copyData(command->err, out);
|
||||
copyData(command->out, out);
|
||||
command->wait();*/
|
||||
|
||||
close(fd);
|
||||
|
||||
if (watch.elapsedSeconds() > 0.1)
|
||||
{
|
||||
std::cerr << watch.elapsedSeconds() << "\n";
|
||||
cancel = true;
|
||||
break;
|
||||
std::cerr << "Timeout\n";
|
||||
/* cancel = true;
|
||||
break;*/
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << e.displayText() << "\n";
|
||||
cancel = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
||||
|
||||
while (filt_pos < filt_end_sse)
|
||||
{
|
||||
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (0 == mask)
|
||||
{
|
||||
|
@ -17,13 +17,17 @@ namespace DB
|
||||
static UInt64 toBits64(const Int8 * bytes64)
|
||||
{
|
||||
static const __m128i zero16 = _mm_setzero_si128();
|
||||
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
|
||||
<< 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
|
||||
<< 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
|
||||
<< 48);
|
||||
UInt64 res =
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
|
||||
|
||||
return ~res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += *pos > 0;
|
||||
count += *pos != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += (*pos & ~*pos2) > 0;
|
||||
count += (*pos & ~*pos2) != 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -232,9 +236,10 @@ namespace
|
||||
|
||||
while (filt_pos < filt_end_aligned)
|
||||
{
|
||||
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
|
||||
zero_vec));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask == 0)
|
||||
{
|
||||
|
@ -119,6 +119,13 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
|
||||
|
||||
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
|
||||
{
|
||||
/// Under high memory pressure, any new allocation will definitelly lead
|
||||
/// to MEMORY_LIMIT_EXCEEDED exception.
|
||||
///
|
||||
/// And in this case the exception will not be logged, so let's block the
|
||||
/// MemoryTracker until the exception will be logged.
|
||||
MemoryTracker::LockExceptionInThread lock_memory_tracker;
|
||||
|
||||
try
|
||||
{
|
||||
if (start_of_message.empty())
|
||||
|
@ -53,7 +53,7 @@ public:
|
||||
if constexpr (std::is_arithmetic_v<U>)
|
||||
{
|
||||
ReadBufferFromString in(l);
|
||||
T parsed;
|
||||
U parsed;
|
||||
readText(parsed, in);
|
||||
return operator()(parsed, r);
|
||||
}
|
||||
@ -113,7 +113,7 @@ public:
|
||||
if constexpr (std::is_arithmetic_v<U>)
|
||||
{
|
||||
ReadBufferFromString in(l);
|
||||
T parsed;
|
||||
U parsed;
|
||||
readText(parsed, in);
|
||||
return operator()(parsed, r);
|
||||
}
|
||||
|
@ -98,14 +98,31 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto first_u32 = UTF8::convert(needle);
|
||||
const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
|
||||
const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
|
||||
auto first_u32 = UTF8::convertUTF8ToCodePoint(needle, needle_size);
|
||||
|
||||
/// Invalid UTF-8
|
||||
if (!first_u32)
|
||||
{
|
||||
/// Process it verbatim as a sequence of bytes.
|
||||
size_t src_len = UTF8::seqLength(*needle);
|
||||
|
||||
memcpy(l_seq, needle, src_len);
|
||||
memcpy(u_seq, needle, src_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t first_l_u32 = Poco::Unicode::toLower(*first_u32);
|
||||
uint32_t first_u_u32 = Poco::Unicode::toUpper(*first_u32);
|
||||
|
||||
/// lower and uppercase variants of the first octet of the first character in `needle`
|
||||
UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
|
||||
size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
|
||||
size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
|
||||
|
||||
if (length_l != length_r)
|
||||
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
|
||||
}
|
||||
|
||||
l = l_seq[0];
|
||||
UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
|
||||
u = u_seq[0];
|
||||
}
|
||||
|
||||
@ -128,18 +145,21 @@ public:
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto src_len = UTF8::seqLength(*needle_pos);
|
||||
const auto c_u32 = UTF8::convert(needle_pos);
|
||||
size_t src_len = std::min<size_t>(needle_end - needle_pos, UTF8::seqLength(*needle_pos));
|
||||
auto c_u32 = UTF8::convertUTF8ToCodePoint(needle_pos, src_len);
|
||||
|
||||
const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
|
||||
const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
|
||||
if (c_u32)
|
||||
{
|
||||
int c_l_u32 = Poco::Unicode::toLower(*c_u32);
|
||||
int c_u_u32 = Poco::Unicode::toUpper(*c_u32);
|
||||
|
||||
const auto dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
|
||||
const auto dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
|
||||
uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_l_u32, l_seq, sizeof(l_seq)));
|
||||
uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_u_u32, u_seq, sizeof(u_seq)));
|
||||
|
||||
/// @note Unicode standard states it is a rare but possible occasion
|
||||
if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
|
||||
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
|
||||
}
|
||||
|
||||
cache_actual_len += src_len;
|
||||
if (cache_actual_len < n)
|
||||
@ -164,7 +184,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const
|
||||
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
|
||||
{
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
@ -183,11 +203,20 @@ public:
|
||||
pos += cache_valid_len;
|
||||
auto needle_pos = needle + cache_valid_len;
|
||||
|
||||
while (needle_pos < needle_end &&
|
||||
Poco::Unicode::toLower(UTF8::convert(pos)) ==
|
||||
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
|
||||
while (needle_pos < needle_end)
|
||||
{
|
||||
/// @note assuming sequences for lowercase and uppercase have exact same length
|
||||
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
|
||||
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
|
||||
/// Invalid UTF-8, should not compare equals
|
||||
if (!haystack_code_point || !needle_code_point)
|
||||
break;
|
||||
|
||||
/// Not equals case insensitive.
|
||||
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
|
||||
break;
|
||||
|
||||
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
|
||||
const auto len = UTF8::seqLength(*pos);
|
||||
pos += len;
|
||||
needle_pos += len;
|
||||
@ -209,10 +238,19 @@ public:
|
||||
pos += first_needle_symbol_is_ascii;
|
||||
auto needle_pos = needle + first_needle_symbol_is_ascii;
|
||||
|
||||
while (needle_pos < needle_end &&
|
||||
Poco::Unicode::toLower(UTF8::convert(pos)) ==
|
||||
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
|
||||
while (needle_pos < needle_end)
|
||||
{
|
||||
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
|
||||
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
|
||||
/// Invalid UTF-8, should not compare equals
|
||||
if (!haystack_code_point || !needle_code_point)
|
||||
break;
|
||||
|
||||
/// Not equals case insensitive.
|
||||
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
|
||||
break;
|
||||
|
||||
const auto len = UTF8::seqLength(*pos);
|
||||
pos += len;
|
||||
needle_pos += len;
|
||||
@ -270,11 +308,20 @@ public:
|
||||
auto haystack_pos = haystack + cache_valid_len;
|
||||
auto needle_pos = needle + cache_valid_len;
|
||||
|
||||
while (haystack_pos < haystack_end && needle_pos < needle_end &&
|
||||
Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
|
||||
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
|
||||
while (haystack_pos < haystack_end && needle_pos < needle_end)
|
||||
{
|
||||
/// @note assuming sequences for lowercase and uppercase have exact same length
|
||||
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
||||
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
|
||||
/// Invalid UTF-8, should not compare equals
|
||||
if (!haystack_code_point || !needle_code_point)
|
||||
break;
|
||||
|
||||
/// Not equals case insensitive.
|
||||
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
|
||||
break;
|
||||
|
||||
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
|
||||
const auto len = UTF8::seqLength(*haystack_pos);
|
||||
haystack_pos += len;
|
||||
needle_pos += len;
|
||||
@ -302,10 +349,19 @@ public:
|
||||
auto haystack_pos = haystack + first_needle_symbol_is_ascii;
|
||||
auto needle_pos = needle + first_needle_symbol_is_ascii;
|
||||
|
||||
while (haystack_pos < haystack_end && needle_pos < needle_end &&
|
||||
Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
|
||||
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
|
||||
while (haystack_pos < haystack_end && needle_pos < needle_end)
|
||||
{
|
||||
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
||||
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
|
||||
/// Invalid UTF-8, should not compare equals
|
||||
if (!haystack_code_point || !needle_code_point)
|
||||
break;
|
||||
|
||||
/// Not equals case insensitive.
|
||||
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
|
||||
break;
|
||||
|
||||
const auto len = UTF8::seqLength(*haystack_pos);
|
||||
haystack_pos += len;
|
||||
needle_pos += len;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <common/types.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
@ -73,26 +74,27 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
int convert(const CharT * bytes)
|
||||
size_t convertCodePointToUTF8(uint32_t code_point, CharT * out_bytes, size_t out_length)
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
return utf8.convert(reinterpret_cast<const uint8_t *>(bytes));
|
||||
int res = utf8.convert(code_point, reinterpret_cast<uint8_t *>(out_bytes), out_length);
|
||||
assert(res >= 0);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
int convert(int ch, CharT * bytes, int length)
|
||||
std::optional<uint32_t> convertUTF8ToCodePoint(const CharT * in_bytes, size_t in_length)
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
return utf8.convert(ch, reinterpret_cast<uint8_t *>(bytes), length);
|
||||
int res = utf8.queryConvert(reinterpret_cast<const uint8_t *>(in_bytes), in_length);
|
||||
|
||||
if (res >= 0)
|
||||
return res;
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
int queryConvert(const CharT * bytes, int length)
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
return utf8.queryConvert(reinterpret_cast<const uint8_t *>(bytes), length);
|
||||
}
|
||||
|
||||
/// returns UTF-8 wcswidth. Invalid sequence is treated as zero width character.
|
||||
/// `prefix` is used to compute the `\t` width which extends the string before
|
||||
|
@ -60,7 +60,7 @@ namespace VolnitskyTraits
|
||||
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
|
||||
|
||||
template <typename Callback>
|
||||
static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
|
||||
static inline void putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
|
||||
{
|
||||
struct Chars
|
||||
{
|
||||
@ -109,21 +109,12 @@ namespace VolnitskyTraits
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
|
||||
template <bool CaseSensitive, bool ASCII, typename Callback>
|
||||
static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
|
||||
{
|
||||
if constexpr (CaseSensitive)
|
||||
{
|
||||
putNGramBase(toNGram(pos), offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (ASCII)
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
}
|
||||
else
|
||||
template <typename Callback>
|
||||
static inline void putNGramUTF8CaseInsensitive(
|
||||
const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
|
||||
{
|
||||
const UInt8 * end = begin + size;
|
||||
|
||||
struct Chars
|
||||
{
|
||||
UInt8 c0;
|
||||
@ -139,7 +130,9 @@ namespace VolnitskyTraits
|
||||
n = toNGram(pos);
|
||||
|
||||
if (isascii(chars.c0) && isascii(chars.c1))
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
}
|
||||
else
|
||||
{
|
||||
/** n-gram (in the case of n = 2)
|
||||
@ -163,33 +156,45 @@ namespace VolnitskyTraits
|
||||
auto seq_pos = pos;
|
||||
UTF8::syncBackward(seq_pos, begin);
|
||||
|
||||
const auto u32 = UTF8::convert(seq_pos);
|
||||
const auto l_u32 = Poco::Unicode::toLower(u32);
|
||||
const auto u_u32 = Poco::Unicode::toUpper(u32);
|
||||
auto u32 = UTF8::convertUTF8ToCodePoint(seq_pos, end - seq_pos);
|
||||
/// Invalid UTF-8
|
||||
if (!u32)
|
||||
{
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
int l_u32 = Poco::Unicode::toLower(*u32);
|
||||
int u_u32 = Poco::Unicode::toUpper(*u32);
|
||||
|
||||
/// symbol is case-independent
|
||||
if (l_u32 == u_u32)
|
||||
{
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// where is the given ngram in respect to the start of UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - seq_pos;
|
||||
size_t seq_ngram_offset = pos - seq_pos;
|
||||
|
||||
Seq seq;
|
||||
|
||||
/// put ngram for lowercase
|
||||
UTF8::convert(l_u32, seq, sizeof(seq));
|
||||
size_t length_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(l_u32, seq, sizeof(seq));
|
||||
assert(length_l >= 2);
|
||||
chars.c0 = seq[seq_ngram_offset];
|
||||
chars.c1 = seq[seq_ngram_offset + 1];
|
||||
putNGramBase(n, offset);
|
||||
|
||||
/// put ngram for uppercase
|
||||
UTF8::convert(u_u32, seq, sizeof(seq));
|
||||
size_t length_r [[maybe_unused]] = UTF8::convertCodePointToUTF8(u_u32, seq, sizeof(seq));
|
||||
assert(length_r >= 2);
|
||||
chars.c0 = seq[seq_ngram_offset]; //-V519
|
||||
chars.c1 = seq[seq_ngram_offset + 1]; //-V519
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ngram is on the boundary of two sequences
|
||||
@ -197,18 +202,30 @@ namespace VolnitskyTraits
|
||||
auto first_seq_pos = pos;
|
||||
UTF8::syncBackward(first_seq_pos, begin);
|
||||
/// where is the given ngram in respect to the start of first UTF-8 sequence?
|
||||
const auto seq_ngram_offset = pos - first_seq_pos;
|
||||
size_t seq_ngram_offset = pos - first_seq_pos;
|
||||
|
||||
const auto first_u32 = UTF8::convert(first_seq_pos);
|
||||
const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
|
||||
const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
|
||||
auto first_u32 = UTF8::convertUTF8ToCodePoint(first_seq_pos, end - first_seq_pos);
|
||||
int first_l_u32 = 0;
|
||||
int first_u_u32 = 0;
|
||||
|
||||
if (first_u32)
|
||||
{
|
||||
first_l_u32 = Poco::Unicode::toLower(*first_u32);
|
||||
first_u_u32 = Poco::Unicode::toUpper(*first_u32);
|
||||
}
|
||||
|
||||
/// second sequence always start immediately after u_pos
|
||||
auto second_seq_pos = pos + 1;
|
||||
|
||||
const auto second_u32 = UTF8::convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
|
||||
const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
|
||||
const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
|
||||
auto second_u32 = UTF8::convertUTF8ToCodePoint(second_seq_pos, end - second_seq_pos);
|
||||
int second_l_u32 = 0;
|
||||
int second_u_u32 = 0;
|
||||
|
||||
if (second_u32)
|
||||
{
|
||||
second_l_u32 = Poco::Unicode::toLower(*second_u32);
|
||||
second_u_u32 = Poco::Unicode::toUpper(*second_u32);
|
||||
}
|
||||
|
||||
/// both symbols are case-independent
|
||||
if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
|
||||
@ -221,12 +238,14 @@ namespace VolnitskyTraits
|
||||
Seq seq;
|
||||
|
||||
/// put ngram for lowercase
|
||||
UTF8::convert(second_l_u32, seq, sizeof(seq));
|
||||
size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, seq, sizeof(seq));
|
||||
assert(size_l >= 1);
|
||||
chars.c1 = seq[0];
|
||||
putNGramBase(n, offset);
|
||||
|
||||
/// put ngram from uppercase, if it is different
|
||||
UTF8::convert(second_u_u32, seq, sizeof(seq));
|
||||
size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, seq, sizeof(seq));
|
||||
assert(size_u >= 1);
|
||||
if (chars.c1 != seq[0])
|
||||
{
|
||||
chars.c1 = seq[0];
|
||||
@ -239,12 +258,14 @@ namespace VolnitskyTraits
|
||||
Seq seq;
|
||||
|
||||
/// put ngram for lowercase
|
||||
UTF8::convert(first_l_u32, seq, sizeof(seq));
|
||||
size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, seq, sizeof(seq));
|
||||
assert(size_l > seq_ngram_offset);
|
||||
chars.c0 = seq[seq_ngram_offset];
|
||||
putNGramBase(n, offset);
|
||||
|
||||
/// put ngram for uppercase, if it is different
|
||||
UTF8::convert(first_u_u32, seq, sizeof(seq));
|
||||
size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, seq, sizeof(seq));
|
||||
assert(size_u > seq_ngram_offset);
|
||||
if (chars.c0 != seq[seq_ngram_offset])
|
||||
{
|
||||
chars.c0 = seq[seq_ngram_offset];
|
||||
@ -258,10 +279,15 @@ namespace VolnitskyTraits
|
||||
Seq second_l_seq;
|
||||
Seq second_u_seq;
|
||||
|
||||
UTF8::convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
|
||||
UTF8::convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
|
||||
UTF8::convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
|
||||
UTF8::convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
|
||||
size_t size_first_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, first_l_seq, sizeof(first_l_seq));
|
||||
size_t size_first_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, first_u_seq, sizeof(first_u_seq));
|
||||
size_t size_second_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, second_l_seq, sizeof(second_l_seq));
|
||||
size_t size_second_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, second_u_seq, sizeof(second_u_seq));
|
||||
|
||||
assert(size_first_l > seq_ngram_offset);
|
||||
assert(size_first_u > seq_ngram_offset);
|
||||
assert(size_second_l > 0);
|
||||
assert(size_second_u > 0);
|
||||
|
||||
auto c0l = first_l_seq[seq_ngram_offset];
|
||||
auto c0u = first_u_seq[seq_ngram_offset];
|
||||
@ -300,7 +326,16 @@ namespace VolnitskyTraits
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool CaseSensitive, bool ASCII, typename Callback>
|
||||
static inline void putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
|
||||
{
|
||||
if constexpr (CaseSensitive)
|
||||
putNGramBase(toNGram(pos), offset);
|
||||
else if constexpr (ASCII)
|
||||
putNGramASCIICaseInsensitive(pos, offset, std::forward<Callback>(putNGramBase));
|
||||
else
|
||||
putNGramUTF8CaseInsensitive(pos, offset, begin, size, std::forward<Callback>(putNGramBase));
|
||||
}
|
||||
}
|
||||
|
||||
@ -310,17 +345,17 @@ template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
|
||||
class VolnitskyBase
|
||||
{
|
||||
protected:
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
const UInt8 * needle;
|
||||
size_t needle_size;
|
||||
const UInt8 * needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
|
||||
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
bool fallback; /// Do we need to use the fallback algorithm.
|
||||
|
||||
FallbackSearcher fallback_searcher;
|
||||
|
||||
@ -346,7 +381,7 @@ public:
|
||||
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
/// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first.
|
||||
for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(needle + i, i + 1, needle, needle_size, callback);
|
||||
}
|
||||
|
||||
|
||||
@ -493,6 +528,7 @@ public:
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
|
||||
i + 1,
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data),
|
||||
cur_needle_size,
|
||||
callback);
|
||||
}
|
||||
}
|
||||
|
@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
|
||||
|
||||
for (size_t offset = min_size; offset < max_size; offset += 16)
|
||||
{
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
|
||||
zero16));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask)
|
||||
{
|
||||
|
@ -36,6 +36,7 @@
|
||||
#define DEFAULT_MERGE_BLOCK_SIZE 8192
|
||||
|
||||
#define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5
|
||||
#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
|
||||
#define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
|
||||
#define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15
|
||||
#define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024
|
||||
|
@ -391,6 +391,7 @@ class IColumn;
|
||||
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
|
||||
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
|
||||
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
|
||||
M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \
|
||||
M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
|
||||
M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \
|
||||
M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \
|
||||
|
@ -504,7 +504,7 @@ private:
|
||||
using namespace traits_;
|
||||
using namespace impl_;
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmetic : public IFunction
|
||||
{
|
||||
static constexpr const bool is_plus = IsOperation<Op>::plus;
|
||||
@ -542,8 +542,35 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castTypeNoFloats(const IDataType * type, F && f)
|
||||
{
|
||||
return castTypeToEither<
|
||||
DataTypeUInt8,
|
||||
DataTypeUInt16,
|
||||
DataTypeUInt32,
|
||||
DataTypeUInt64,
|
||||
DataTypeUInt256,
|
||||
DataTypeInt8,
|
||||
DataTypeInt16,
|
||||
DataTypeInt32,
|
||||
DataTypeInt64,
|
||||
DataTypeInt128,
|
||||
DataTypeInt256,
|
||||
DataTypeDate,
|
||||
DataTypeDateTime,
|
||||
DataTypeDecimal<Decimal32>,
|
||||
DataTypeDecimal<Decimal64>,
|
||||
DataTypeDecimal<Decimal128>,
|
||||
DataTypeDecimal<Decimal256>,
|
||||
DataTypeFixedString
|
||||
>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
|
||||
{
|
||||
if constexpr (valid_on_float_arguments)
|
||||
{
|
||||
return castType(left, [&](const auto & left_)
|
||||
{
|
||||
@ -553,6 +580,17 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
});
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
return castTypeNoFloats(left, [&](const auto & left_)
|
||||
{
|
||||
return castTypeNoFloats(right, [&](const auto & right_)
|
||||
{
|
||||
return f(left_, right_);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr
|
||||
getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, const Context & context)
|
||||
@ -1319,11 +1357,11 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
|
||||
{
|
||||
public:
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
|
||||
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
|
||||
using Monotonicity = typename Base::Monotonicity;
|
||||
|
||||
static FunctionPtr create(
|
||||
@ -1488,7 +1526,7 @@ private:
|
||||
DataTypePtr return_type;
|
||||
};
|
||||
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
|
||||
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
|
||||
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
public:
|
||||
@ -1512,14 +1550,14 @@ public:
|
||||
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
|
||||
{
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
|
||||
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
|
||||
arguments[0], arguments[1], return_type, context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
|
||||
return std::make_unique<DefaultFunction>(
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
|
||||
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
|
||||
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
||||
return_type);
|
||||
}
|
||||
@ -1530,7 +1568,7 @@ public:
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -68,12 +68,12 @@ struct AddSecondsImpl : public AddOnDateTime64DefaultImpl<AddSecondsImpl>
|
||||
|
||||
static constexpr auto name = "addSeconds";
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
|
||||
{
|
||||
return t + delta;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta;
|
||||
}
|
||||
@ -92,7 +92,7 @@ struct AddMinutesImpl : public AddOnDateTime64DefaultImpl<AddMinutesImpl>
|
||||
return t + delta * 60;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta * 60;
|
||||
}
|
||||
@ -111,7 +111,7 @@ struct AddHoursImpl : public AddOnDateTime64DefaultImpl<AddHoursImpl>
|
||||
return t + delta * 3600;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
|
||||
}
|
||||
@ -125,18 +125,12 @@ struct AddDaysImpl : public AddOnDateTime64DefaultImpl<AddDaysImpl>
|
||||
|
||||
static constexpr auto name = "addDays";
|
||||
|
||||
// static inline UInt32 execute(UInt64 t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
// {
|
||||
// // TODO (nemkov): LUT does not support out-of range date values for now.
|
||||
// return time_zone.addDays(t, delta);
|
||||
// }
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.addDays(t, delta);
|
||||
}
|
||||
|
||||
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
|
||||
{
|
||||
return d + delta;
|
||||
}
|
||||
@ -155,7 +149,7 @@ struct AddWeeksImpl : public AddOnDateTime64DefaultImpl<AddWeeksImpl>
|
||||
return time_zone.addWeeks(t, delta);
|
||||
}
|
||||
|
||||
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
|
||||
{
|
||||
return d + delta * 7;
|
||||
}
|
||||
|
@ -263,6 +263,12 @@ public:
|
||||
static constexpr auto name = "IPv6StringToNum";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
|
||||
|
||||
static inline bool tryParseIPv4(const char * pos)
|
||||
{
|
||||
UInt32 result = 0;
|
||||
return DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
@ -270,8 +276,8 @@ public:
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
|
||||
}
|
||||
@ -292,13 +298,27 @@ public:
|
||||
const ColumnString::Chars & vec_src = col_in->getChars();
|
||||
const ColumnString::Offsets & offsets_src = col_in->getOffsets();
|
||||
size_t src_offset = 0;
|
||||
char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
|
||||
|
||||
for (size_t out_offset = 0, i = 0;
|
||||
out_offset < vec_res.size();
|
||||
out_offset += IPV6_BINARY_LENGTH, ++i)
|
||||
for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
|
||||
{
|
||||
/// In case of failure, the function fills vec_res with zero bytes.
|
||||
parseIPv6(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
|
||||
/// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes.
|
||||
|
||||
/// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
|
||||
/// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
|
||||
if (tryParseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
|
||||
{
|
||||
std::memcpy(
|
||||
src_ipv4_buf + std::strlen("::ffff:"),
|
||||
reinterpret_cast<const char *>(&vec_src[src_offset]),
|
||||
std::min<UInt64>(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
|
||||
parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
|
||||
}
|
||||
else
|
||||
{
|
||||
parseIPv6(
|
||||
reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
|
||||
}
|
||||
src_offset = offsets_src[i];
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
|
||||
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
|
||||
|
||||
template <typename A, typename B, typename Impl, typename Name>
|
||||
struct GCDLCMImpl
|
||||
@ -33,7 +33,7 @@ struct GCDLCMImpl
|
||||
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
|
||||
apply(A, B)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
|
||||
}
|
||||
|
||||
template <typename Result = ResultType>
|
||||
|
@ -465,7 +465,7 @@ std::vector<size_t> buildKMPPrefixFunction(const SliceType & pattern, const Equa
|
||||
for (size_t i = 1; i < pattern.size; ++i)
|
||||
{
|
||||
result[i] = 0;
|
||||
for (auto length = i; length > 0;)
|
||||
for (size_t length = i; length > 0;)
|
||||
{
|
||||
length = result[length - 1];
|
||||
if (isEqualFunc(pattern, i, length))
|
||||
@ -695,7 +695,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
|
||||
|
||||
if (size >= 0)
|
||||
{
|
||||
auto length = static_cast<size_t>(size);
|
||||
size_t length = static_cast<size_t>(size);
|
||||
if (length > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
|
||||
length, MAX_ARRAY_SIZE);
|
||||
@ -711,7 +711,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
|
||||
}
|
||||
else
|
||||
{
|
||||
auto length = static_cast<size_t>(-size);
|
||||
size_t length = -static_cast<size_t>(size);
|
||||
if (length > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
|
||||
length, MAX_ARRAY_SIZE);
|
||||
@ -744,7 +744,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
|
||||
|
||||
if (size >= 0)
|
||||
{
|
||||
auto length = static_cast<size_t>(size);
|
||||
size_t length = static_cast<size_t>(size);
|
||||
if (length > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
|
||||
length, MAX_ARRAY_SIZE);
|
||||
@ -760,7 +760,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
|
||||
}
|
||||
else
|
||||
{
|
||||
auto length = static_cast<size_t>(-size);
|
||||
size_t length = -static_cast<size_t>(size);
|
||||
if (length > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
|
||||
length, MAX_ARRAY_SIZE);
|
||||
|
@ -135,15 +135,16 @@ struct LowerUpperUTF8Impl
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
int src_sequence_length = UTF8::seqLength(*src);
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
|
||||
int src_code_point = UTF8::queryConvert(src, src_end - src);
|
||||
if (src_code_point > 0)
|
||||
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
|
||||
if (src_code_point)
|
||||
{
|
||||
int dst_code_point = to_case(src_code_point);
|
||||
int dst_code_point = to_case(*src_code_point);
|
||||
if (dst_code_point > 0)
|
||||
{
|
||||
int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
|
||||
size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
|
||||
assert(dst_sequence_length <= 4);
|
||||
|
||||
/// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
|
||||
/// As an example, this happens for ß and ẞ.
|
||||
@ -156,7 +157,9 @@ struct LowerUpperUTF8Impl
|
||||
}
|
||||
}
|
||||
|
||||
*dst++ = *src++;
|
||||
*dst = *src;
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,7 +168,6 @@ namespace MultiRegexps
|
||||
hs_database_t * db = nullptr;
|
||||
hs_compile_error_t * compile_error;
|
||||
|
||||
|
||||
std::unique_ptr<unsigned int[]> ids;
|
||||
|
||||
/// We mark the patterns to provide the callback results.
|
||||
|
@ -37,7 +37,7 @@ struct BitAndImpl
|
||||
};
|
||||
|
||||
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
|
||||
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitOrImpl
|
||||
};
|
||||
|
||||
struct NameBitOr { static constexpr auto name = "bitOr"; };
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
|
||||
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ struct BitRotateLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
|
||||
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitRotateRightImpl
|
||||
};
|
||||
|
||||
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
|
||||
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftLeftImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
|
||||
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct BitShiftRightImpl
|
||||
};
|
||||
|
||||
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
|
||||
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ struct BitTestImpl
|
||||
};
|
||||
|
||||
struct NameBitTest { static constexpr auto name = "bitTest"; };
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
|
||||
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ struct BitXorImpl
|
||||
};
|
||||
|
||||
struct NameBitXor { static constexpr auto name = "bitXor"; };
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
|
||||
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/WriteBufferFromVector.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -134,6 +135,13 @@ public:
|
||||
/// Virtual call is Ok (negligible comparing to the rest of calculations).
|
||||
Float64 value = arguments[0].column->getFloat64(i);
|
||||
|
||||
if (!isFinite(value))
|
||||
{
|
||||
/// Cannot decide what unit it is (years, month), just simply write inf or nan.
|
||||
writeFloatText(value, buf_to);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool is_negative = value < 0;
|
||||
if (is_negative)
|
||||
{
|
||||
@ -153,6 +161,7 @@ public:
|
||||
case Minutes: processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
|
||||
case Seconds: processUnit(1, " second", 7, value, buf_to, has_output);
|
||||
}
|
||||
}
|
||||
|
||||
writeChar(0, buf_to);
|
||||
offsets_to[i] = buf_to.count();
|
||||
|
@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
|
||||
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ void geodistInit()
|
||||
|
||||
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
|
||||
|
||||
sphere_metric_lut[i] = cosf(latitude);
|
||||
sphere_metric_lut[i] = sqrf(cosf(latitude));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
|
||||
/// (Remember how a plane flies from Moscow to New York)
|
||||
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
|
||||
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
|
||||
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
|
||||
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
|
||||
|
||||
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
|
||||
|
@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
|
||||
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -119,8 +119,13 @@ public:
|
||||
UInt32 code_point2 = generate_code_point(rand >> 32);
|
||||
|
||||
/// We have padding in column buffers that we can overwrite.
|
||||
pos += UTF8::convert(code_point1, pos, sizeof(int));
|
||||
last_writen_bytes = UTF8::convert(code_point2, pos, sizeof(int));
|
||||
size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int));
|
||||
assert(length1 <= 4);
|
||||
pos += length1;
|
||||
|
||||
size_t length2 = UTF8::convertCodePointToUTF8(code_point2, pos, sizeof(int));
|
||||
assert(length2 <= 4);
|
||||
last_writen_bytes = length2;
|
||||
pos += last_writen_bytes;
|
||||
}
|
||||
offset = pos - data_to.data() + 1;
|
||||
|
@ -683,7 +683,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
|
||||
|
||||
/** CSV format can contain insignificant spaces and tabs.
|
||||
* Usually the task of skipping them is for the calling code.
|
||||
* But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
|
||||
* But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself.
|
||||
*/
|
||||
size_t size = s.size();
|
||||
while (size > 0
|
||||
|
@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
|
||||
boost::algorithm::to_lower(matched_region);
|
||||
region = matched_region;
|
||||
}
|
||||
else
|
||||
{
|
||||
region = Aws::Region::AWS_GLOBAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ActionsDAG::empty() const
|
||||
bool ActionsDAG::trivial() const
|
||||
{
|
||||
for (const auto & node : nodes)
|
||||
if (node.type != ActionType::INPUT)
|
||||
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -223,7 +223,7 @@ public:
|
||||
|
||||
bool hasArrayJoin() const;
|
||||
bool hasStatefulFunctions() const;
|
||||
bool empty() const; /// If actions only contain inputs.
|
||||
bool trivial() const; /// If actions has no functions or array join.
|
||||
|
||||
const ActionsSettings & getSettings() const { return settings; }
|
||||
|
||||
|
@ -341,6 +341,7 @@ struct ContextShared
|
||||
mutable std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
|
||||
mutable std::optional<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
|
||||
mutable std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
|
||||
mutable std::optional<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
|
||||
MultiVersion<Macros> macros; /// Substitutions extracted from config.
|
||||
std::unique_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
|
||||
/// Rules for selecting the compression settings, depending on the size of the part.
|
||||
@ -439,6 +440,7 @@ struct ContextShared
|
||||
buffer_flush_schedule_pool.reset();
|
||||
schedule_pool.reset();
|
||||
distributed_schedule_pool.reset();
|
||||
message_broker_schedule_pool.reset();
|
||||
ddl_worker.reset();
|
||||
|
||||
/// Stop trace collector if any
|
||||
@ -1524,6 +1526,17 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
|
||||
return *shared->distributed_schedule_pool;
|
||||
}
|
||||
|
||||
BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (!shared->message_broker_schedule_pool)
|
||||
shared->message_broker_schedule_pool.emplace(
|
||||
settings.background_message_broker_schedule_pool_size,
|
||||
CurrentMetrics::BackgroundDistributedSchedulePoolTask,
|
||||
"BgMsgBrkSchPool");
|
||||
return *shared->message_broker_schedule_pool;
|
||||
}
|
||||
|
||||
bool Context::hasDistributedDDL() const
|
||||
{
|
||||
return getConfigRef().has("distributed_ddl");
|
||||
|
@ -613,6 +613,7 @@ public:
|
||||
|
||||
BackgroundSchedulePool & getBufferFlushSchedulePool() const;
|
||||
BackgroundSchedulePool & getSchedulePool() const;
|
||||
BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
|
||||
BackgroundSchedulePool & getDistributedSchedulePool() const;
|
||||
|
||||
/// Has distributed_ddl configuration or not.
|
||||
|
@ -26,11 +26,18 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
|
||||
{
|
||||
auto & internal_select_list = union_select_query.list_of_selects->children;
|
||||
|
||||
if (!internal_select_list.empty())
|
||||
for (size_t index = 0; index < internal_select_list.size(); ++index)
|
||||
{
|
||||
if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
|
||||
visit(*child_union, internal_select_list[index]);
|
||||
else
|
||||
{
|
||||
if (index == 0)
|
||||
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
|
||||
|
||||
for (size_t index = 1; index < internal_select_list.size(); ++index)
|
||||
else
|
||||
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
|
||||
|
@ -269,6 +269,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
||||
if (live_view_timeout)
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "")
|
||||
<< *live_view_timeout;
|
||||
|
||||
if (live_view_periodic_refresh)
|
||||
{
|
||||
if (live_view_timeout)
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : "");
|
||||
else
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "");
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "")
|
||||
<< *live_view_periodic_refresh;
|
||||
}
|
||||
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
else
|
||||
|
@ -77,6 +77,8 @@ public:
|
||||
ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.)
|
||||
|
||||
std::optional<UInt64> live_view_timeout; /// For CREATE LIVE VIEW ... WITH TIMEOUT ...
|
||||
std::optional<UInt64> live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ...
|
||||
|
||||
bool attach_short_syntax{false};
|
||||
|
||||
std::optional<String> attach_from_path = std::nullopt;
|
||||
|
@ -307,10 +307,11 @@ bool ASTSelectQuery::final() const
|
||||
|
||||
bool ASTSelectQuery::withFill() const
|
||||
{
|
||||
if (!orderBy())
|
||||
const ASTPtr order_by = orderBy();
|
||||
if (!order_by)
|
||||
return false;
|
||||
|
||||
for (const auto & order_expression_element : orderBy()->children)
|
||||
for (const auto & order_expression_element : order_by->children)
|
||||
if (order_expression_element->as<ASTOrderByElement &>().with_fill)
|
||||
return true;
|
||||
|
||||
|
@ -569,10 +569,14 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
ASTPtr as_table;
|
||||
ASTPtr select;
|
||||
ASTPtr live_view_timeout;
|
||||
ASTPtr live_view_periodic_refresh;
|
||||
|
||||
String cluster_str;
|
||||
bool attach = false;
|
||||
bool if_not_exists = false;
|
||||
bool with_and = false;
|
||||
bool with_timeout = false;
|
||||
bool with_periodic_refresh = false;
|
||||
|
||||
if (!s_create.ignore(pos, expected))
|
||||
{
|
||||
@ -594,12 +598,37 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
if (!table_name_p.parse(pos, table, expected))
|
||||
return false;
|
||||
|
||||
if (ParserKeyword{"WITH TIMEOUT"}.ignore(pos, expected))
|
||||
if (ParserKeyword{"WITH"}.ignore(pos, expected))
|
||||
{
|
||||
if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected))
|
||||
{
|
||||
if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
|
||||
{
|
||||
live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
|
||||
}
|
||||
|
||||
/// Optional - AND
|
||||
if (ParserKeyword{"AND"}.ignore(pos, expected))
|
||||
with_and = true;
|
||||
|
||||
with_timeout = true;
|
||||
}
|
||||
|
||||
if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected))
|
||||
{
|
||||
if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected))
|
||||
live_view_periodic_refresh = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC));
|
||||
|
||||
with_periodic_refresh = true;
|
||||
}
|
||||
|
||||
else if (with_and)
|
||||
return false;
|
||||
|
||||
if (!with_timeout && !with_periodic_refresh)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ParserKeyword{"ON"}.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
@ -656,6 +685,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
if (live_view_timeout)
|
||||
query->live_view_timeout.emplace(live_view_timeout->as<ASTLiteral &>().value.safeGet<UInt64>());
|
||||
|
||||
if (live_view_periodic_refresh)
|
||||
query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as<ASTLiteral &>().value.safeGet<UInt64>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4,13 +4,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
|
||||
{
|
||||
/// We have just two kind of parentheses: () and [].
|
||||
UnmatchedParentheses stack;
|
||||
|
||||
for (TokenIterator it = begin;
|
||||
it.isValid() && it->begin <= last.begin; ++it)
|
||||
/// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
|
||||
/// when parser failed in the middle of the query.
|
||||
for (TokenIterator it = begin; it.isValid(); ++it)
|
||||
{
|
||||
if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
|
||||
{
|
||||
|
@ -80,6 +80,6 @@ public:
|
||||
|
||||
/// Returns positions of unmatched parentheses.
|
||||
using UnmatchedParentheses = std::vector<Token>;
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);
|
||||
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin);
|
||||
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ void writeQueryWithHighlightedErrorPositions(
|
||||
{
|
||||
const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
|
||||
|
||||
assert(current_position_to_hilite < end);
|
||||
assert(current_position_to_hilite <= end);
|
||||
assert(current_position_to_hilite >= begin);
|
||||
|
||||
out.write(pos, current_position_to_hilite - pos);
|
||||
@ -290,7 +290,7 @@ ASTPtr tryParseQuery(
|
||||
}
|
||||
|
||||
/// Unmatched parentheses
|
||||
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
|
||||
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens));
|
||||
if (!unmatched_parens.empty())
|
||||
{
|
||||
out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,
|
||||
|
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
56
src/Processors/QueryPlan/Optimizations/Optimizations.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <array>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// This is the main function which optimizes the whole QueryPlan tree.
|
||||
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Optimization is a function applied to QueryPlan::Node.
|
||||
/// It can read and update subtree of specified node.
|
||||
/// It return the number of updated layers of subtree if some change happened.
|
||||
/// It must guarantee that the structure of tree is correct.
|
||||
///
|
||||
/// New nodes should be added to QueryPlan::Nodes list.
|
||||
/// It is not needed to remove old nodes from the list.
|
||||
struct Optimization
|
||||
{
|
||||
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
|
||||
const Function apply = nullptr;
|
||||
const char * name;
|
||||
};
|
||||
|
||||
/// Move ARRAY JOIN up if possible.
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Move LimitStep down if possible.
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
|
||||
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 4> optimizations =
|
||||
{{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
|
||||
{tryPushDownLimit, "pushDownLimit"},
|
||||
{trySplitFilter, "splitFilter"},
|
||||
{tryMergeExpressions, "mergeExpressions"},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
85
src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/ArrayJoinStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
|
||||
|
||||
if (!(expression_step || filter_step) || !array_join_step)
|
||||
return 0;
|
||||
|
||||
const auto & array_join = array_join_step->arrayJoin();
|
||||
const auto & expression = expression_step ? expression_step->getExpression()
|
||||
: filter_step->getExpression();
|
||||
|
||||
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
|
||||
|
||||
/// No actions can be moved before ARRAY JOIN.
|
||||
if (split_actions.first->trivial())
|
||||
return 0;
|
||||
|
||||
auto description = parent->getStepDescription();
|
||||
|
||||
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
|
||||
if (split_actions.second->trivial())
|
||||
{
|
||||
auto expected_header = parent->getOutputStream().header;
|
||||
|
||||
/// Expression/Filter -> ArrayJoin
|
||||
std::swap(parent, child);
|
||||
/// ArrayJoin -> Expression/Filter
|
||||
|
||||
if (expression_step)
|
||||
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
else
|
||||
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
child->setStepDescription(std::move(description));
|
||||
|
||||
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/// Add new expression step before ARRAY JOIN.
|
||||
/// Expression/Filter -> ArrayJoin -> Something
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.swap(child_node->children);
|
||||
child_node->children.emplace_back(&node);
|
||||
/// Expression/Filter -> ArrayJoin -> node -> Something
|
||||
|
||||
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
node.step->setStepDescription(description);
|
||||
array_join_step->updateInputStream(node.step->getOutputStream(), {});
|
||||
|
||||
if (expression_step)
|
||||
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
|
||||
else
|
||||
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
|
||||
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
|
||||
|
||||
parent->setStepDescription(description + " [split]");
|
||||
return 3;
|
||||
}
|
||||
|
||||
}
|
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
114
src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/ITransformingStep.h>
|
||||
#include <Processors/QueryPlan/LimitStep.h>
|
||||
#include <Processors/QueryPlan/TotalsHavingStep.h>
|
||||
#include <Processors/QueryPlan/MergingSortedStep.h>
|
||||
#include <Processors/QueryPlan/FinishSortingStep.h>
|
||||
#include <Processors/QueryPlan/MergeSortingStep.h>
|
||||
#include <Processors/QueryPlan/PartialSortingStep.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// If plan looks like Limit -> Sorting, update limit for Sorting
|
||||
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
return false;
|
||||
|
||||
QueryPlanStepPtr & step = node->step;
|
||||
QueryPlan::Node * child = nullptr;
|
||||
bool updated = false;
|
||||
|
||||
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
merging_sorted->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
finish_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
|
||||
{
|
||||
merge_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
|
||||
{
|
||||
partial_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
|
||||
/// Try update limit for them also if possible.
|
||||
if (child)
|
||||
tryUpdateLimitForSortingSteps(child, limit);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * limit = typeid_cast<LimitStep *>(parent.get());
|
||||
|
||||
if (!limit)
|
||||
return 0;
|
||||
|
||||
/// Skip LIMIT WITH TIES by now.
|
||||
if (limit->withTies())
|
||||
return 0;
|
||||
|
||||
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
|
||||
|
||||
/// Skip everything which is not transform.
|
||||
if (!transforming)
|
||||
return 0;
|
||||
|
||||
/// Special cases for sorting steps.
|
||||
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
|
||||
return 0;
|
||||
|
||||
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
|
||||
if (typeid_cast<const TotalsHavingStep *>(child.get()))
|
||||
return 0;
|
||||
|
||||
/// Now we should decide if pushing down limit possible for this step.
|
||||
|
||||
const auto & transform_traits = transforming->getTransformTraits();
|
||||
const auto & data_stream_traits = transforming->getDataStreamTraits();
|
||||
|
||||
/// Cannot push down if child changes the number of rows.
|
||||
if (!transform_traits.preserves_number_of_rows)
|
||||
return 0;
|
||||
|
||||
/// Cannot push down if data was sorted exactly by child stream.
|
||||
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
|
||||
return 0;
|
||||
|
||||
/// Now we push down limit only if it doesn't change any stream properties.
|
||||
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
|
||||
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
|
||||
return 0;
|
||||
|
||||
/// Input stream for Limit have changed.
|
||||
limit->updateInputStream(transforming->getInputStreams().front());
|
||||
|
||||
parent.swap(child);
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
65
src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return false;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
|
||||
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
|
||||
|
||||
if (parent_expr && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_expr->getExpression();
|
||||
|
||||
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
|
||||
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
|
||||
/// Such a query will return two zeroes if we combine actions together.
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
|
||||
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(expr);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
else if (parent_filter && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_filter->getExpression();
|
||||
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
|
||||
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
|
||||
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(filter);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
75
src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
Normal file
75
src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
Normal file
@ -0,0 +1,75 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <stack>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
const auto & optimizations = getOptimizations();
|
||||
|
||||
struct Frame
|
||||
{
|
||||
QueryPlan::Node * node;
|
||||
|
||||
/// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
|
||||
/// Otherwise, traverse all children.
|
||||
size_t depth_limit = 0;
|
||||
|
||||
/// Next child to process.
|
||||
size_t next_child = 0;
|
||||
};
|
||||
|
||||
std::stack<Frame> stack;
|
||||
stack.push(Frame{.node = &root});
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
auto & frame = stack.top();
|
||||
|
||||
/// If traverse_depth_limit == 0, then traverse without limit (first entrance)
|
||||
/// If traverse_depth_limit > 1, then traverse with (limit - 1)
|
||||
if (frame.depth_limit != 1)
|
||||
{
|
||||
/// Traverse all children first.
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
stack.push(Frame
|
||||
{
|
||||
.node = frame.node->children[frame.next_child],
|
||||
.depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
|
||||
});
|
||||
|
||||
++frame.next_child;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
size_t max_update_depth = 0;
|
||||
|
||||
/// Apply all optimizations.
|
||||
for (const auto & optimization : optimizations)
|
||||
{
|
||||
/// Just in case, skip optimization if it is not initialized.
|
||||
if (!optimization.apply)
|
||||
continue;
|
||||
|
||||
/// Try to apply optimization.
|
||||
auto update_depth = optimization.apply(frame.node, nodes);
|
||||
max_update_depth = std::max<size_t>(max_update_depth, update_depth);
|
||||
}
|
||||
|
||||
/// Traverse `max_update_depth` layers of tree again.
|
||||
if (max_update_depth)
|
||||
{
|
||||
frame.depth_limit = max_update_depth;
|
||||
frame.next_child = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Nothing was applied.
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
50
src/Processors/QueryPlan/Optimizations/splitFilter.cpp
Normal file
50
src/Processors/QueryPlan/Optimizations/splitFilter.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
|
||||
if (!filter_step)
|
||||
return 0;
|
||||
|
||||
const auto & expr = filter_step->getExpression();
|
||||
|
||||
/// Do not split if there are function like runningDifference.
|
||||
if (expr->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
|
||||
|
||||
if (split.second->trivial())
|
||||
return 0;
|
||||
|
||||
if (filter_step->removesFilterColumn())
|
||||
split.second->removeUnusedInput(filter_step->getFilterColumnName());
|
||||
|
||||
auto description = filter_step->getStepDescription();
|
||||
|
||||
auto & filter_node = nodes.emplace_back();
|
||||
node->children.swap(filter_node.children);
|
||||
node->children.push_back(&filter_node);
|
||||
|
||||
filter_node.step = std::make_unique<FilterStep>(
|
||||
filter_node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
|
||||
|
||||
filter_node.step->setStepDescription("(" + description + ")[split]");
|
||||
node->step->setStepDescription(description);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
@ -6,15 +6,7 @@
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
#include <stack>
|
||||
#include <Processors/QueryPlan/LimitStep.h>
|
||||
#include "MergingSortedStep.h"
|
||||
#include "FinishSortingStep.h"
|
||||
#include "MergeSortingStep.h"
|
||||
#include "PartialSortingStep.h"
|
||||
#include "TotalsHavingStep.h"
|
||||
#include "ExpressionStep.h"
|
||||
#include "ArrayJoinStep.h"
|
||||
#include "FilterStep.h"
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
|
||||
}
|
||||
}
|
||||
|
||||
/// If plan looks like Limit -> Sorting, update limit for Sorting
|
||||
bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
|
||||
{
|
||||
if (limit == 0)
|
||||
return false;
|
||||
|
||||
QueryPlanStepPtr & step = node->step;
|
||||
QueryPlan::Node * child = nullptr;
|
||||
bool updated = false;
|
||||
|
||||
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
merging_sorted->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
|
||||
{
|
||||
/// TODO: remove LimitStep here.
|
||||
finish_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
|
||||
{
|
||||
merge_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
child = node->children.front();
|
||||
}
|
||||
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
|
||||
{
|
||||
partial_sorting->updateLimit(limit);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
|
||||
/// Try update limit for them also if possible.
|
||||
if (child)
|
||||
tryUpdateLimitForSortingSteps(child, limit);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
/// Move LimitStep down if possible.
|
||||
static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
|
||||
{
|
||||
auto & child = child_node->step;
|
||||
auto * limit = typeid_cast<LimitStep *>(parent.get());
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
/// Skip LIMIT WITH TIES by now.
|
||||
if (limit->withTies())
|
||||
return;
|
||||
|
||||
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
|
||||
|
||||
/// Skip everything which is not transform.
|
||||
if (!transforming)
|
||||
return;
|
||||
|
||||
/// Special cases for sorting steps.
|
||||
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
|
||||
return;
|
||||
|
||||
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
|
||||
if (typeid_cast<const TotalsHavingStep *>(child.get()))
|
||||
return;
|
||||
|
||||
/// Now we should decide if pushing down limit possible for this step.
|
||||
|
||||
const auto & transform_traits = transforming->getTransformTraits();
|
||||
const auto & data_stream_traits = transforming->getDataStreamTraits();
|
||||
|
||||
/// Cannot push down if child changes the number of rows.
|
||||
if (!transform_traits.preserves_number_of_rows)
|
||||
return;
|
||||
|
||||
/// Cannot push down if data was sorted exactly by child stream.
|
||||
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
|
||||
return;
|
||||
|
||||
/// Now we push down limit only if it doesn't change any stream properties.
|
||||
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
|
||||
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
|
||||
return;
|
||||
|
||||
/// Input stream for Limit have changed.
|
||||
limit->updateInputStream(transforming->getInputStreams().front());
|
||||
|
||||
parent.swap(child);
|
||||
}
|
||||
|
||||
/// Move ARRAY JOIN up if possible.
|
||||
static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
|
||||
|
||||
if (!(expression_step || filter_step) || !array_join_step)
|
||||
return;
|
||||
|
||||
const auto & array_join = array_join_step->arrayJoin();
|
||||
const auto & expression = expression_step ? expression_step->getExpression()
|
||||
: filter_step->getExpression();
|
||||
|
||||
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
|
||||
|
||||
/// No actions can be moved before ARRAY JOIN.
|
||||
if (split_actions.first->empty())
|
||||
return;
|
||||
|
||||
auto description = parent->getStepDescription();
|
||||
|
||||
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
|
||||
if (split_actions.second->empty())
|
||||
{
|
||||
auto expected_header = parent->getOutputStream().header;
|
||||
|
||||
/// Expression/Filter -> ArrayJoin
|
||||
std::swap(parent, child);
|
||||
/// ArrayJoin -> Expression/Filter
|
||||
|
||||
if (expression_step)
|
||||
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
else
|
||||
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
child->setStepDescription(std::move(description));
|
||||
|
||||
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Add new expression step before ARRAY JOIN.
|
||||
/// Expression/Filter -> ArrayJoin -> Something
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.swap(child_node->children);
|
||||
child_node->children.emplace_back(&node);
|
||||
/// Expression/Filter -> ArrayJoin -> node -> Something
|
||||
|
||||
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split_actions.first));
|
||||
node.step->setStepDescription(description);
|
||||
array_join_step->updateInputStream(node.step->getOutputStream(), {});
|
||||
|
||||
if (expression_step)
|
||||
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
|
||||
else
|
||||
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
|
||||
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
|
||||
|
||||
parent->setStepDescription(description + " [split]");
|
||||
}
|
||||
|
||||
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
|
||||
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
|
||||
static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
|
||||
{
|
||||
auto & parent = parent_node->step;
|
||||
auto & child = child_node->step;
|
||||
|
||||
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
|
||||
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
|
||||
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
|
||||
|
||||
if (parent_expr && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_expr->getExpression();
|
||||
|
||||
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
|
||||
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
|
||||
/// Such a query will return two zeroes if we combine actions together.
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
|
||||
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(expr);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return true;
|
||||
}
|
||||
else if (parent_filter && child_expr)
|
||||
{
|
||||
const auto & child_actions = child_expr->getExpression();
|
||||
const auto & parent_actions = parent_filter->getExpression();
|
||||
|
||||
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
|
||||
|
||||
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
|
||||
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
|
||||
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
|
||||
|
||||
parent_node->step = std::move(filter);
|
||||
parent_node->children.swap(child_node->children);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
|
||||
static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
|
||||
if (!filter_step)
|
||||
return false;
|
||||
|
||||
const auto & expr = filter_step->getExpression();
|
||||
|
||||
/// Do not split if there are function like runningDifference.
|
||||
if (expr->hasStatefulFunctions())
|
||||
return false;
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
|
||||
|
||||
if (split.second->empty())
|
||||
return false;
|
||||
|
||||
if (filter_step->removesFilterColumn())
|
||||
split.second->removeUnusedInput(filter_step->getFilterColumnName());
|
||||
|
||||
auto description = filter_step->getStepDescription();
|
||||
|
||||
auto & filter_node = nodes.emplace_back();
|
||||
node->children.swap(filter_node.children);
|
||||
node->children.push_back(&filter_node);
|
||||
|
||||
filter_node.step = std::make_unique<FilterStep>(
|
||||
filter_node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
filter_step->removesFilterColumn());
|
||||
|
||||
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
|
||||
|
||||
filter_node.step->setStepDescription("(" + description + ")[split]");
|
||||
node->step->setStepDescription(description);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void QueryPlan::optimize()
|
||||
{
|
||||
/* Stack contains info for every nodes in the path from tree root to the current node.
|
||||
* Every optimization changes only current node and it's children.
|
||||
* Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
|
||||
* So, QueryPlan::Node::children will be always valid.
|
||||
*/
|
||||
|
||||
struct Frame
|
||||
{
|
||||
Node * node;
|
||||
size_t next_child = 0;
|
||||
};
|
||||
|
||||
std::stack<Frame> stack;
|
||||
stack.push(Frame{.node = root});
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
auto & frame = stack.top();
|
||||
|
||||
if (frame.next_child == 0)
|
||||
{
|
||||
if (frame.node->children.size() == 1)
|
||||
{
|
||||
tryPushDownLimit(frame.node->step, frame.node->children.front());
|
||||
|
||||
while (tryMergeExpressions(frame.node, frame.node->children.front()));
|
||||
|
||||
if (frame.node->children.size() == 1)
|
||||
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
|
||||
|
||||
trySplitFilter(frame.node, nodes);
|
||||
}
|
||||
}
|
||||
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
stack.push(Frame{frame.node->children[frame.next_child]});
|
||||
++frame.next_child;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (frame.node->children.size() == 1)
|
||||
{
|
||||
while (tryMergeExpressions(frame.node, frame.node->children.front()));
|
||||
|
||||
trySplitFilter(frame.node, nodes);
|
||||
|
||||
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
|
||||
}
|
||||
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
QueryPlanOptimizations::optimizeTree(*root, nodes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -116,6 +116,11 @@ SRCS(
|
||||
QueryPlan/MergingFinal.cpp
|
||||
QueryPlan/MergingSortedStep.cpp
|
||||
QueryPlan/OffsetStep.cpp
|
||||
QueryPlan/Optimizations/liftUpArrayJoin.cpp
|
||||
QueryPlan/Optimizations/limitPushDown.cpp
|
||||
QueryPlan/Optimizations/mergeExpressions.cpp
|
||||
QueryPlan/Optimizations/optimizeTree.cpp
|
||||
QueryPlan/Optimizations/splitFilter.cpp
|
||||
QueryPlan/PartialSortingStep.cpp
|
||||
QueryPlan/QueryPlan.cpp
|
||||
QueryPlan/ReadFromPreparedSource.cpp
|
||||
|
@ -109,15 +109,16 @@ public:
|
||||
* because it allows to check the IP ranges of the trusted proxy.
|
||||
* Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
|
||||
*/
|
||||
TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_)
|
||||
TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
|
||||
std::string server_display_name_)
|
||||
: Poco::Net::TCPServerConnection(socket_)
|
||||
, server(server_)
|
||||
, parse_proxy_protocol(parse_proxy_protocol_)
|
||||
, log(&Poco::Logger::get("TCPHandler"))
|
||||
, connection_context(server.context())
|
||||
, query_context(server.context())
|
||||
, server_display_name(std::move(server_display_name_))
|
||||
{
|
||||
server_display_name = server.config().getString("display_name", getFQDNOrHostName());
|
||||
}
|
||||
|
||||
void run() override;
|
||||
|
@ -17,6 +17,7 @@ private:
|
||||
IServer & server;
|
||||
bool parse_proxy_protocol = false;
|
||||
Poco::Logger * log;
|
||||
std::string server_display_name;
|
||||
|
||||
class DummyTCPHandler : public Poco::Net::TCPServerConnection
|
||||
{
|
||||
@ -34,6 +35,7 @@ public:
|
||||
: server(server_), parse_proxy_protocol(parse_proxy_protocol_)
|
||||
, log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory"))
|
||||
{
|
||||
server_display_name = server.config().getString("display_name", getFQDNOrHostName());
|
||||
}
|
||||
|
||||
Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
|
||||
@ -42,7 +44,7 @@ public:
|
||||
{
|
||||
LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
|
||||
|
||||
return new TCPHandler(server, socket, parse_proxy_protocol);
|
||||
return new TCPHandler(server, socket, parse_proxy_protocol, server_display_name);
|
||||
}
|
||||
catch (const Poco::Net::NetException &)
|
||||
{
|
||||
|
@ -196,7 +196,7 @@ StorageKafka::StorageKafka(
|
||||
auto task_count = thread_per_consumer ? num_consumers : 1;
|
||||
for (size_t i = 0; i < task_count; ++i)
|
||||
{
|
||||
auto task = global_context.getSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
|
||||
auto task = global_context.getMessageBrokerSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
|
||||
task->deactivate();
|
||||
tasks.emplace_back(std::make_shared<TaskContext>(std::move(task)));
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
{
|
||||
new_blocks_metadata->hash = key_str;
|
||||
new_blocks_metadata->version = storage.getBlocksVersion() + 1;
|
||||
new_blocks_metadata->time = std::chrono::system_clock::now();
|
||||
|
||||
for (auto & block : *new_blocks)
|
||||
{
|
||||
@ -48,6 +49,15 @@ public:
|
||||
|
||||
storage.condition.notify_all();
|
||||
}
|
||||
else
|
||||
{
|
||||
// only update blocks time
|
||||
new_blocks_metadata->hash = storage.getBlocksHashKey();
|
||||
new_blocks_metadata->version = storage.getBlocksVersion();
|
||||
new_blocks_metadata->time = std::chrono::system_clock::now();
|
||||
|
||||
(*storage.blocks_metadata_ptr) = new_blocks_metadata;
|
||||
}
|
||||
|
||||
new_blocks.reset();
|
||||
new_blocks_metadata.reset();
|
||||
|
@ -20,6 +20,7 @@ limitations under the License. */
|
||||
#include <DataStreams/MaterializingBlockInputStream.h>
|
||||
#include <DataStreams/SquashingBlockInputStream.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
@ -254,6 +255,8 @@ StorageLiveView::StorageLiveView(
|
||||
live_view_context = std::make_unique<Context>(global_context);
|
||||
live_view_context->makeQueryContext();
|
||||
|
||||
log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")");
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
@ -275,12 +278,21 @@ StorageLiveView::StorageLiveView(
|
||||
if (query.live_view_timeout)
|
||||
{
|
||||
is_temporary = true;
|
||||
temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
|
||||
temporary_live_view_timeout = Seconds {*query.live_view_timeout};
|
||||
}
|
||||
|
||||
if (query.live_view_periodic_refresh)
|
||||
{
|
||||
is_periodically_refreshed = true;
|
||||
periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh};
|
||||
}
|
||||
|
||||
blocks_ptr = std::make_shared<BlocksPtr>();
|
||||
blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
|
||||
active_ptr = std::make_shared<bool>(true);
|
||||
|
||||
periodic_refresh_task = global_context.getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
|
||||
periodic_refresh_task->deactivate();
|
||||
}
|
||||
|
||||
Block StorageLiveView::getHeader() const
|
||||
@ -369,10 +381,21 @@ bool StorageLiveView::getNewBlocks()
|
||||
}
|
||||
new_blocks_metadata->hash = key.toHexString();
|
||||
new_blocks_metadata->version = getBlocksVersion() + 1;
|
||||
new_blocks_metadata->time = std::chrono::system_clock::now();
|
||||
|
||||
(*blocks_ptr) = new_blocks;
|
||||
(*blocks_metadata_ptr) = new_blocks_metadata;
|
||||
|
||||
updated = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_blocks_metadata->hash = getBlocksHashKey();
|
||||
new_blocks_metadata->version = getBlocksVersion();
|
||||
new_blocks_metadata->time = std::chrono::system_clock::now();
|
||||
|
||||
(*blocks_metadata_ptr) = new_blocks_metadata;
|
||||
}
|
||||
}
|
||||
return updated;
|
||||
}
|
||||
@ -392,11 +415,18 @@ void StorageLiveView::startup()
|
||||
{
|
||||
if (is_temporary)
|
||||
TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
|
||||
|
||||
if (is_periodically_refreshed)
|
||||
periodic_refresh_task->activate();
|
||||
}
|
||||
|
||||
void StorageLiveView::shutdown()
|
||||
{
|
||||
shutdown_called = true;
|
||||
|
||||
if (is_periodically_refreshed)
|
||||
periodic_refresh_task->deactivate();
|
||||
|
||||
DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
|
||||
}
|
||||
|
||||
@ -415,15 +445,55 @@ void StorageLiveView::drop()
|
||||
condition.notify_all();
|
||||
}
|
||||
|
||||
void StorageLiveView::refresh()
|
||||
void StorageLiveView::scheduleNextPeriodicRefresh()
|
||||
{
|
||||
Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
|
||||
Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
|
||||
|
||||
if ((current_time - periodic_live_view_refresh) >= blocks_time)
|
||||
{
|
||||
refresh(false);
|
||||
blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
|
||||
}
|
||||
current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
|
||||
|
||||
auto next_refresh_time = blocks_time + periodic_live_view_refresh;
|
||||
|
||||
if (current_time >= next_refresh_time)
|
||||
periodic_refresh_task->scheduleAfter(0);
|
||||
else
|
||||
{
|
||||
auto schedule_time = std::chrono::duration_cast<MilliSeconds> (next_refresh_time - current_time);
|
||||
periodic_refresh_task->scheduleAfter(static_cast<size_t>(schedule_time.count()));
|
||||
}
|
||||
}
|
||||
|
||||
void StorageLiveView::periodicRefreshTaskFunc()
|
||||
{
|
||||
LOG_TRACE(log, "periodic refresh task");
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (hasActiveUsers())
|
||||
scheduleNextPeriodicRefresh();
|
||||
}
|
||||
|
||||
void StorageLiveView::refresh(bool grab_lock)
|
||||
{
|
||||
// Lock is already acquired exclusively from InterperterAlterQuery.cpp InterpreterAlterQuery::execute() method.
|
||||
// So, reacquiring lock is not needed and will result in an exception.
|
||||
|
||||
if (grab_lock)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (getNewBlocks())
|
||||
condition.notify_all();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getNewBlocks())
|
||||
condition.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
Pipe StorageLiveView::read(
|
||||
@ -435,15 +505,21 @@ Pipe StorageLiveView::read(
|
||||
const size_t /*max_block_size*/,
|
||||
const unsigned /*num_streams*/)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!(*blocks_ptr))
|
||||
refresh(false);
|
||||
|
||||
else if (is_periodically_refreshed)
|
||||
{
|
||||
if (getNewBlocks())
|
||||
condition.notify_all();
|
||||
Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
|
||||
Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
|
||||
|
||||
if ((current_time - periodic_live_view_refresh) >= blocks_time)
|
||||
refresh(false);
|
||||
}
|
||||
|
||||
return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
|
||||
}
|
||||
}
|
||||
|
||||
BlockInputStreams StorageLiveView::watch(
|
||||
@ -458,6 +534,7 @@ BlockInputStreams StorageLiveView::watch(
|
||||
|
||||
bool has_limit = false;
|
||||
UInt64 limit = 0;
|
||||
BlockInputStreamPtr reader;
|
||||
|
||||
if (query.limit_length)
|
||||
{
|
||||
@ -466,45 +543,28 @@ BlockInputStreams StorageLiveView::watch(
|
||||
}
|
||||
|
||||
if (query.is_watch_events)
|
||||
{
|
||||
auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
|
||||
reader = std::make_shared<LiveViewEventsBlockInputStream>(
|
||||
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
|
||||
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
|
||||
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!(*blocks_ptr))
|
||||
{
|
||||
if (getNewBlocks())
|
||||
condition.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
processed_stage = QueryProcessingStage::Complete;
|
||||
|
||||
return { reader };
|
||||
}
|
||||
else
|
||||
{
|
||||
auto reader = std::make_shared<LiveViewBlockInputStream>(
|
||||
reader = std::make_shared<LiveViewBlockInputStream>(
|
||||
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
|
||||
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
|
||||
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!(*blocks_ptr))
|
||||
{
|
||||
if (getNewBlocks())
|
||||
condition.notify_all();
|
||||
}
|
||||
refresh(false);
|
||||
|
||||
if (is_periodically_refreshed)
|
||||
scheduleNextPeriodicRefresh();
|
||||
}
|
||||
|
||||
processed_stage = QueryProcessingStage::Complete;
|
||||
|
||||
return { reader };
|
||||
}
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageLiveView::getVirtuals() const
|
||||
|
@ -13,6 +13,7 @@ limitations under the License. */
|
||||
|
||||
#include <ext/shared_ptr_helper.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
@ -21,10 +22,16 @@ limitations under the License. */
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using Time = std::chrono::time_point<std::chrono::system_clock>;
|
||||
using Seconds = std::chrono::seconds;
|
||||
using MilliSeconds = std::chrono::milliseconds;
|
||||
|
||||
|
||||
struct BlocksMetadata
|
||||
{
|
||||
String hash;
|
||||
UInt64 version;
|
||||
Time time;
|
||||
};
|
||||
|
||||
struct MergeableBlocks
|
||||
@ -75,8 +82,10 @@ public:
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
bool isTemporary() const { return is_temporary; }
|
||||
std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
|
||||
bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; }
|
||||
|
||||
Seconds getTimeout() const { return temporary_live_view_timeout; }
|
||||
Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; }
|
||||
|
||||
/// Check if we have any readers
|
||||
/// must be called with mutex locked
|
||||
@ -109,6 +118,15 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Get blocks time
|
||||
/// must be called with mutex locked
|
||||
Time getBlocksTime()
|
||||
{
|
||||
if (*blocks_metadata_ptr)
|
||||
return (*blocks_metadata_ptr)->time;
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Reset blocks
|
||||
/// must be called with mutex locked
|
||||
void reset()
|
||||
@ -124,7 +142,7 @@ public:
|
||||
void startup() override;
|
||||
void shutdown() override;
|
||||
|
||||
void refresh();
|
||||
void refresh(const bool grab_lock = true);
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
@ -176,8 +194,13 @@ private:
|
||||
Context & global_context;
|
||||
std::unique_ptr<Context> live_view_context;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
bool is_temporary = false;
|
||||
std::chrono::seconds temporary_live_view_timeout;
|
||||
bool is_periodically_refreshed = false;
|
||||
|
||||
Seconds temporary_live_view_timeout;
|
||||
Seconds periodic_live_view_refresh;
|
||||
|
||||
/// Mutex to protect access to sample block and inner_blocks_query
|
||||
mutable std::mutex sample_block_lock;
|
||||
@ -199,6 +222,13 @@ private:
|
||||
|
||||
std::atomic<bool> shutdown_called = false;
|
||||
|
||||
/// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement
|
||||
BackgroundSchedulePool::TaskHolder periodic_refresh_task;
|
||||
void periodicRefreshTaskFunc();
|
||||
|
||||
/// Must be called with mutex locked
|
||||
void scheduleNextPeriodicRefresh();
|
||||
|
||||
StorageLiveView(
|
||||
const StorageID & table_id_,
|
||||
Context & local_context,
|
||||
|
@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
|
||||
end -= 64;
|
||||
const auto * pos = end;
|
||||
UInt64 val =
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
|
||||
zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
|
||||
zero16))) << 16u)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
|
||||
zero16))) << 32u)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
|
||||
zero16))) << 48u);
|
||||
val = ~val;
|
||||
if (val == 0)
|
||||
count += 64;
|
||||
else
|
||||
|
@ -74,20 +74,9 @@ size_t ReplicatedMergeTreePartCheckThread::size() const
|
||||
}
|
||||
|
||||
|
||||
void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & part_name)
|
||||
ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name)
|
||||
{
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
String part_path = storage.replica_path + "/parts/" + part_name;
|
||||
|
||||
/// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
|
||||
if (zookeeper->exists(part_path))
|
||||
{
|
||||
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
|
||||
|
||||
storage.removePartAndEnqueueFetch(part_name);
|
||||
return;
|
||||
}
|
||||
|
||||
/// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
|
||||
@ -115,7 +104,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
* and don't delete the queue entry when in doubt.
|
||||
*/
|
||||
|
||||
LOG_WARNING(log, "Checking if anyone has a part covering {}.", part_name);
|
||||
LOG_WARNING(log, "Checking if anyone has a part {} or covering part.", part_name);
|
||||
|
||||
bool found_part_with_the_same_min_block = false;
|
||||
bool found_part_with_the_same_max_block = false;
|
||||
@ -123,15 +112,27 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas");
|
||||
for (const String & replica : replicas)
|
||||
{
|
||||
Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
|
||||
String replica_path = storage.zookeeper_path + "/replicas/" + replica;
|
||||
|
||||
Strings parts = zookeeper->getChildren(replica_path + "/parts");
|
||||
for (const String & part_on_replica : parts)
|
||||
{
|
||||
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
|
||||
|
||||
if (part_info == part_on_replica_info)
|
||||
{
|
||||
/// Found missing part at ourself. If we are here then something wrong with this part, so skipping.
|
||||
if (replica_path == storage.replica_path)
|
||||
continue;
|
||||
|
||||
LOG_WARNING(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica);
|
||||
return MissingPartSearchResult::FoundAndNeedFetch;
|
||||
}
|
||||
|
||||
if (part_on_replica_info.contains(part_info))
|
||||
{
|
||||
LOG_WARNING(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name);
|
||||
return;
|
||||
return MissingPartSearchResult::FoundAndDontNeedFetch;
|
||||
}
|
||||
|
||||
if (part_info.contains(part_on_replica_info))
|
||||
@ -144,7 +145,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
if (found_part_with_the_same_min_block && found_part_with_the_same_max_block)
|
||||
{
|
||||
LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name);
|
||||
return;
|
||||
return MissingPartSearchResult::FoundAndDontNeedFetch;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,14 +161,39 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
not_found_msg = "smaller parts with either the same min block or the same max block.";
|
||||
LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg);
|
||||
|
||||
return MissingPartSearchResult::LostForever;
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper)
|
||||
{
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name);
|
||||
|
||||
/// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
|
||||
if (exists_in_zookeeper)
|
||||
{
|
||||
/// If part found on some other replica
|
||||
if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch)
|
||||
{
|
||||
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name);
|
||||
storage.removePartAndEnqueueFetch(part_name);
|
||||
}
|
||||
else /// If we have covering part on other replica or part is lost forever we don't need to fetch anything
|
||||
{
|
||||
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name);
|
||||
storage.removePartFromZooKeeper(part_name);
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
|
||||
|
||||
if (missing_part_search_result == MissingPartSearchResult::LostForever)
|
||||
{
|
||||
/// Is it in the replication queue? If there is - delete, because the task can not be processed.
|
||||
if (!storage.queue.remove(zookeeper, part_name))
|
||||
{
|
||||
/// The part was not in our queue. Why did it happen?
|
||||
LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
|
||||
return;
|
||||
/// The part was not in our queue.
|
||||
LOG_WARNING(log, "Missing part {} is not in our queue, this can happen rarely.", part_name);
|
||||
}
|
||||
|
||||
/** This situation is possible if on all the replicas where the part was, it deteriorated.
|
||||
@ -175,13 +201,21 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
*/
|
||||
LOG_ERROR(log, "Part {} is lost forever.", part_name);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name)
|
||||
{
|
||||
LOG_WARNING(log, "Checking part {}", part_name);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
String part_path = storage.replica_path + "/parts/" + part_name;
|
||||
|
||||
/// It's important to check zookeeper first and after that check local storage,
|
||||
/// because our checks of local storage and zookeeper are not consistent.
|
||||
/// If part exists in zookeeper and doesn't exists in local storage definitely require
|
||||
/// to fetch this part. But if we check local storage first and than check zookeeper
|
||||
/// some background process can successfully commit part between this checks (both to the local stoarge and zookeeper),
|
||||
/// but checker thread will remove part from zookeeper and queue fetch.
|
||||
bool exists_in_zookeeper = zookeeper->exists(part_path);
|
||||
|
||||
/// If the part is still in the PreCommitted -> Committed transition, it is not lost
|
||||
/// and there is no need to go searching for it on other replicas. To definitely find the needed part
|
||||
@ -190,17 +224,27 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
|
||||
if (!part)
|
||||
part = storage.getActiveContainingPart(part_name);
|
||||
|
||||
return std::make_pair(exists_in_zookeeper, part);
|
||||
}
|
||||
|
||||
CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
{
|
||||
LOG_WARNING(log, "Checking part {}", part_name);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
|
||||
|
||||
auto [exists_in_zookeeper, part] = findLocalPart(part_name);
|
||||
|
||||
/// We do not have this or a covering part.
|
||||
if (!part)
|
||||
{
|
||||
searchForMissingPart(part_name);
|
||||
searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
|
||||
return {part_name, false, "Part is missing, will search for it"};
|
||||
}
|
||||
|
||||
/// We have this part, and it's active. We will check whether we need this part and whether it has the right data.
|
||||
else if (part->name == part_name)
|
||||
if (part->name == part_name)
|
||||
{
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
|
||||
auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations);
|
||||
|
||||
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
|
||||
@ -254,11 +298,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
|
||||
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
|
||||
String message = "Part " + part_name + " looks broken. Removing it and queueing a fetch.";
|
||||
String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
|
||||
LOG_ERROR(log, message);
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
|
||||
|
||||
storage.removePartAndEnqueueFetch(part_name);
|
||||
/// Part is broken, let's try to find it and fetch.
|
||||
searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
|
||||
|
||||
/// Delete part locally.
|
||||
storage.forgetPartAndMoveToDetached(part, "broken");
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Storages/CheckResults.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -73,7 +74,26 @@ public:
|
||||
private:
|
||||
void run();
|
||||
|
||||
void searchForMissingPart(const String & part_name);
|
||||
/// Search for missing part and queue fetch if possible. Otherwise
|
||||
/// remove part from zookeeper and queue.
|
||||
void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper);
|
||||
|
||||
std::pair<bool, MergeTreeDataPartPtr> findLocalPart(const String & part_name);
|
||||
|
||||
enum MissingPartSearchResult
|
||||
{
|
||||
/// We found this part on other replica, let's fetch it.
|
||||
FoundAndNeedFetch,
|
||||
/// We found covering part or source part with same min and max block number
|
||||
/// don't need to fetch because we should do it during normal queue processing.
|
||||
FoundAndDontNeedFetch,
|
||||
/// Covering part not found anywhere and exact part_name doesn't found on other
|
||||
/// replicas.
|
||||
LostForever,
|
||||
};
|
||||
|
||||
/// Search for missing part on other replicas or covering part on all replicas (including our replica).
|
||||
MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name);
|
||||
|
||||
StorageReplicatedMergeTree & storage;
|
||||
String log_name;
|
||||
|
@ -420,13 +420,26 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri
|
||||
{
|
||||
std::unique_lock lock(state_mutex);
|
||||
|
||||
virtual_parts.remove(part_name);
|
||||
bool removed = virtual_parts.remove(part_name);
|
||||
|
||||
for (Queue::iterator it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
if ((*it)->new_part_name == part_name)
|
||||
{
|
||||
found = *it;
|
||||
if (removed)
|
||||
{
|
||||
/// Preserve invariant `virtual_parts` = `current_parts` + `queue`.
|
||||
/// We remove new_part from virtual parts and add all source parts
|
||||
/// which present in current_parts.
|
||||
for (const auto & source_part : found->source_parts)
|
||||
{
|
||||
auto part_in_current_parts = current_parts.getContainingPart(source_part);
|
||||
if (part_in_current_parts == source_part)
|
||||
virtual_parts.add(source_part);
|
||||
}
|
||||
}
|
||||
|
||||
updateStateOnQueueEntryRemoval(
|
||||
found, /* is_successful = */ false,
|
||||
min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock);
|
||||
@ -1010,7 +1023,7 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_
|
||||
/// NOTE The above is redundant, but left for a more convenient message in the log.
|
||||
auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
|
||||
|
||||
/// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited.
|
||||
/// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
|
||||
for (const auto & future_part_elem : future_parts)
|
||||
{
|
||||
auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);
|
||||
|
@ -112,13 +112,13 @@ StorageRabbitMQ::StorageRabbitMQ(
|
||||
|
||||
/// One looping task for all consumers as they share the same connection == the same handler == the same event loop
|
||||
event_handler->updateLoopState(Loop::STOP);
|
||||
looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
|
||||
looping_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
|
||||
looping_task->deactivate();
|
||||
|
||||
streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
|
||||
streaming_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
|
||||
streaming_task->deactivate();
|
||||
|
||||
connection_task = global_context.getSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
|
||||
connection_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
|
||||
connection_task->deactivate();
|
||||
|
||||
if (queue_base.empty())
|
||||
|
@ -3013,6 +3013,21 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name)
|
||||
{
|
||||
auto zookeeper = getZooKeeper();
|
||||
String part_path = replica_path + "/parts/" + part_name;
|
||||
Coordination::Stat stat;
|
||||
|
||||
/// Part doesn't exist, nothing to remove
|
||||
if (!zookeeper->exists(part_path, &stat))
|
||||
return;
|
||||
|
||||
Coordination::Requests ops;
|
||||
|
||||
removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
|
||||
zookeeper->multi(ops);
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
|
||||
{
|
||||
|
@ -381,6 +381,9 @@ private:
|
||||
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
|
||||
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
|
||||
|
||||
/// Just removes part from ZooKeeper using previous method
|
||||
void removePartFromZooKeeper(const String & part_name);
|
||||
|
||||
/// Quickly removes big set of parts from ZooKeeper (using async multi queries)
|
||||
void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
|
||||
NameSet * parts_should_be_retried = nullptr);
|
||||
|
@ -329,7 +329,7 @@ Pipe StorageS3::read(
|
||||
context,
|
||||
metadata_snapshot->getColumns(),
|
||||
max_block_size,
|
||||
chooseCompressionMethod(uri.endpoint, compression_method),
|
||||
chooseCompressionMethod(uri.key, compression_method),
|
||||
client,
|
||||
uri.bucket,
|
||||
key));
|
||||
@ -347,7 +347,7 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet
|
||||
format_name,
|
||||
metadata_snapshot->getSampleBlock(),
|
||||
global_context,
|
||||
chooseCompressionMethod(uri.endpoint, compression_method),
|
||||
chooseCompressionMethod(uri.key, compression_method),
|
||||
client,
|
||||
uri.bucket,
|
||||
uri.key,
|
||||
|
@ -23,6 +23,7 @@ const char * auto_contributors[] {
|
||||
"Alexander Burmak",
|
||||
"Alexander Ermolaev",
|
||||
"Alexander GQ Gerasiov",
|
||||
"Alexander Gololobov",
|
||||
"Alexander Kazakov",
|
||||
"Alexander Kozhikhov",
|
||||
"Alexander Krasheninnikov",
|
||||
@ -43,6 +44,7 @@ const char * auto_contributors[] {
|
||||
"Alexandr Krasheninnikov",
|
||||
"Alexandr Orlov",
|
||||
"Alexandra Latysheva",
|
||||
"Alexandre Snarskii",
|
||||
"Alexei Averchenko",
|
||||
"Alexey",
|
||||
"Alexey Arno",
|
||||
@ -143,6 +145,7 @@ const char * auto_contributors[] {
|
||||
"CurtizJ",
|
||||
"Daniel Bershatsky",
|
||||
"Daniel Dao",
|
||||
"Daniel Qin",
|
||||
"Danila Kutenin",
|
||||
"Dao Minh Thuc",
|
||||
"Daria Mozhaeva",
|
||||
@ -309,7 +312,9 @@ const char * auto_contributors[] {
|
||||
"Marek Vavrusa",
|
||||
"Marek Vavruša",
|
||||
"Marek Vavruša",
|
||||
"Mariano Benítez Mulet",
|
||||
"Mark Andreev",
|
||||
"Mark Frost",
|
||||
"Mark Papadakis",
|
||||
"Maroun Maroun",
|
||||
"Marsel Arduanov",
|
||||
@ -422,6 +427,7 @@ const char * auto_contributors[] {
|
||||
"Rafael David Tinoco",
|
||||
"Ramazan Polat",
|
||||
"Ravengg",
|
||||
"RegulusZ",
|
||||
"Reilee",
|
||||
"Reto Kromer",
|
||||
"Ri",
|
||||
@ -482,9 +488,11 @@ const char * auto_contributors[] {
|
||||
"Tangaev",
|
||||
"Tema Novikov",
|
||||
"The-Alchemist",
|
||||
"TiunovNN",
|
||||
"Tobias Adamson",
|
||||
"Tom Bombadil",
|
||||
"Tsarkova Anastasia",
|
||||
"TszkitLo40",
|
||||
"Ubuntu",
|
||||
"Ubus",
|
||||
"UnamedRus",
|
||||
@ -556,6 +564,7 @@ const char * auto_contributors[] {
|
||||
"Yury Stankevich",
|
||||
"Zhichang Yu",
|
||||
"Zhipeng",
|
||||
"Zoran Pandovski",
|
||||
"a.palagashvili",
|
||||
"abdrakhmanov",
|
||||
"abyss7",
|
||||
@ -571,6 +580,7 @@ const char * auto_contributors[] {
|
||||
"alex.lvxin",
|
||||
"alexander kozhikhov",
|
||||
"alexey-milovidov",
|
||||
"alfredlu",
|
||||
"amoschen",
|
||||
"amudong",
|
||||
"ana-uvarova",
|
||||
@ -588,14 +598,17 @@ const char * auto_contributors[] {
|
||||
"avsharapov",
|
||||
"awesomeleo",
|
||||
"benamazing",
|
||||
"benbiti",
|
||||
"bgranvea",
|
||||
"bharatnc",
|
||||
"blazerer",
|
||||
"bluebirddm",
|
||||
"bo zeng",
|
||||
"bobrovskij artemij",
|
||||
"booknouse",
|
||||
"bseng",
|
||||
"cekc",
|
||||
"centos7",
|
||||
"champtar",
|
||||
"chang.chen",
|
||||
"chengy8934",
|
||||
@ -606,6 +619,7 @@ const char * auto_contributors[] {
|
||||
"comunodi",
|
||||
"coraxster",
|
||||
"damozhaeva",
|
||||
"dankondr",
|
||||
"daoready",
|
||||
"dasmfm",
|
||||
"davydovska",
|
||||
@ -627,6 +641,7 @@ const char * auto_contributors[] {
|
||||
"elBroom",
|
||||
"elenaspb2019",
|
||||
"emakarov",
|
||||
"emhlbmc",
|
||||
"emironyuk",
|
||||
"evtan",
|
||||
"exprmntr",
|
||||
@ -673,6 +688,7 @@ const char * auto_contributors[] {
|
||||
"javi santana",
|
||||
"jetgm",
|
||||
"jianmei zhang",
|
||||
"jyz0309",
|
||||
"kmeaw",
|
||||
"koshachy",
|
||||
"kreuzerkrieg",
|
||||
@ -779,7 +795,9 @@ const char * auto_contributors[] {
|
||||
"taiyang-li",
|
||||
"tao jiang",
|
||||
"tavplubix",
|
||||
"templarzq",
|
||||
"tiger.yan",
|
||||
"tison",
|
||||
"topvisor",
|
||||
"tyrionhuang",
|
||||
"ubuntu",
|
||||
@ -800,6 +818,7 @@ const char * auto_contributors[] {
|
||||
"weeds085490",
|
||||
"xPoSx",
|
||||
"yangshuai",
|
||||
"ygrek",
|
||||
"yhgcn",
|
||||
"ylchou",
|
||||
"yonesko",
|
||||
|
@ -135,6 +135,9 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
|
||||
|
||||
os.environ["CLICKHOUSE_DATABASE"] = database
|
||||
|
||||
# This is for .sh tests
|
||||
os.environ.setdefault("CLICKHOUSE_LOG_COMMENT", case_file)
|
||||
|
||||
params = {
|
||||
'client': args.client + ' --database=' + database,
|
||||
'logs_level': server_logs_level,
|
||||
|
@ -113,9 +113,9 @@ class ClickHouseCluster:
|
||||
self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(
|
||||
HELPERS_DIR, 'zookeeper_config.xml')
|
||||
|
||||
self.project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
|
||||
project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
|
||||
# docker-compose removes everything non-alphanumeric from project names so we do it too.
|
||||
self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower())
|
||||
self.project_name = re.sub(r'[^a-z0-9]', '', project_name.lower())
|
||||
self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name))
|
||||
self.docker_logs_path = p.join(self.instances_dir, 'docker.log')
|
||||
|
||||
@ -126,8 +126,8 @@ class ClickHouseCluster:
|
||||
self.base_cmd = ['docker-compose']
|
||||
if custom_dockerd_host:
|
||||
self.base_cmd += ['--host', custom_dockerd_host]
|
||||
self.base_cmd += ['--project-name', self.project_name]
|
||||
|
||||
self.base_cmd += ['--project-directory', self.base_dir, '--project-name', self.project_name]
|
||||
self.base_zookeeper_cmd = None
|
||||
self.base_mysql_cmd = []
|
||||
self.base_kafka_cmd = []
|
||||
@ -260,25 +260,23 @@ class ClickHouseCluster:
|
||||
self.with_zookeeper = True
|
||||
self.zookeeper_use_tmpfs = zookeeper_use_tmpfs
|
||||
self.base_cmd.extend(['--file', zookeeper_docker_compose_path])
|
||||
self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file', zookeeper_docker_compose_path]
|
||||
self.base_zookeeper_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', zookeeper_docker_compose_path]
|
||||
cmds.append(self.base_zookeeper_cmd)
|
||||
|
||||
if with_mysql and not self.with_mysql:
|
||||
self.with_mysql = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
|
||||
self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
|
||||
self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
|
||||
|
||||
cmds.append(self.base_mysql_cmd)
|
||||
|
||||
if with_postgres and not self.with_postgres:
|
||||
self.with_postgres = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
|
||||
self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
|
||||
self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
|
||||
cmds.append(self.base_postgres_cmd)
|
||||
|
||||
if with_odbc_drivers and not self.with_odbc_drivers:
|
||||
@ -286,64 +284,57 @@ class ClickHouseCluster:
|
||||
if not self.with_mysql:
|
||||
self.with_mysql = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
|
||||
self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
|
||||
self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
|
||||
cmds.append(self.base_mysql_cmd)
|
||||
|
||||
if not self.with_postgres:
|
||||
self.with_postgres = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
|
||||
self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
|
||||
self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
|
||||
cmds.append(self.base_postgres_cmd)
|
||||
|
||||
if with_kafka and not self.with_kafka:
|
||||
self.with_kafka = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')])
|
||||
self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
|
||||
self.base_kafka_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
|
||||
cmds.append(self.base_kafka_cmd)
|
||||
|
||||
if with_kerberized_kafka and not self.with_kerberized_kafka:
|
||||
self.with_kerberized_kafka = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')])
|
||||
self.base_kerberized_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
|
||||
self.base_kerberized_kafka_cmd = ['docker-compose','--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
|
||||
cmds.append(self.base_kerberized_kafka_cmd)
|
||||
|
||||
if with_rabbitmq and not self.with_rabbitmq:
|
||||
self.with_rabbitmq = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')])
|
||||
self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
|
||||
self.base_rabbitmq_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
|
||||
cmds.append(self.base_rabbitmq_cmd)
|
||||
|
||||
if with_hdfs and not self.with_hdfs:
|
||||
self.with_hdfs = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')])
|
||||
self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
|
||||
self.base_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
|
||||
cmds.append(self.base_hdfs_cmd)
|
||||
|
||||
if with_kerberized_hdfs and not self.with_kerberized_hdfs:
|
||||
self.with_kerberized_hdfs = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')])
|
||||
self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
|
||||
self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
|
||||
cmds.append(self.base_kerberized_hdfs_cmd)
|
||||
|
||||
if with_mongo and not self.with_mongo:
|
||||
self.with_mongo = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')])
|
||||
self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
|
||||
self.base_mongo_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
|
||||
cmds.append(self.base_mongo_cmd)
|
||||
|
||||
if self.with_net_trics:
|
||||
@ -353,26 +344,25 @@ class ClickHouseCluster:
|
||||
if with_redis and not self.with_redis:
|
||||
self.with_redis = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')])
|
||||
self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
|
||||
self.base_redis_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
|
||||
|
||||
if with_minio and not self.with_minio:
|
||||
self.with_minio = True
|
||||
self.minio_certs_dir = minio_certs_dir
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')])
|
||||
self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
|
||||
self.base_minio_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
|
||||
cmds.append(self.base_minio_cmd)
|
||||
|
||||
if with_cassandra and not self.with_cassandra:
|
||||
self.with_cassandra = True
|
||||
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')])
|
||||
self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file',
|
||||
p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
|
||||
self.base_cassandra_cmd = ['docker-compose', '--project-name', self.project_name,
|
||||
'--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
|
||||
|
||||
print("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format(
|
||||
self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir))
|
||||
return instance
|
||||
|
||||
def get_instance_docker_id(self, instance_name):
|
||||
@ -407,7 +397,10 @@ class ClickHouseCluster:
|
||||
return node
|
||||
|
||||
def get_instance_ip(self, instance_name):
|
||||
print("get_instance_ip instance_name={}".format(instance_name))
|
||||
docker_id = self.get_instance_docker_id(instance_name)
|
||||
# for cont in self.docker_client.containers.list():
|
||||
# print("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status))
|
||||
handle = self.docker_client.containers.get(docker_id)
|
||||
return list(handle.attrs['NetworkSettings']['Networks'].values())[0]['IPAddress']
|
||||
|
||||
@ -936,7 +929,7 @@ class ClickHouseInstance:
|
||||
self.with_cassandra = with_cassandra
|
||||
|
||||
self.path = p.join(self.cluster.instances_dir, name)
|
||||
self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
|
||||
self.docker_compose_path = p.join(self.path, 'docker-compose.yml')
|
||||
self.env_variables = env_variables or {}
|
||||
if with_odbc_drivers:
|
||||
self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
|
||||
@ -1163,14 +1156,14 @@ class ClickHouseInstance:
|
||||
if status == 'exited':
|
||||
raise Exception(
|
||||
"Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
|
||||
handle.logs()))
|
||||
handle.logs().decode('utf-8')))
|
||||
|
||||
current_time = time.time()
|
||||
time_left = deadline - current_time
|
||||
if deadline is not None and current_time >= deadline:
|
||||
raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
|
||||
"Container status: {}, logs: {}".format(self.name, self.ip_address, status,
|
||||
handle.logs()))
|
||||
handle.logs().decode('utf-8')))
|
||||
|
||||
# Repeatedly poll the instance address until there is something that listens there.
|
||||
# Usually it means that ClickHouse is ready to accept queries.
|
||||
|
@ -0,0 +1 @@
|
||||
#!/usr/bin/env python3
|
61
tests/integration/test_broken_part_during_merge/test.py
Normal file
61
tests/integration/test_broken_part_during_merge/test.py
Normal file
@ -0,0 +1,61 @@
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from multiprocessing.dummy import Pool
|
||||
from helpers.network import PartitionManager
|
||||
import time
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1', with_zookeeper=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
node1.query('''
|
||||
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id;
|
||||
'''.format(replica=node1.name))
|
||||
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
def corrupt_data_part_on_disk(node, table, part_name):
|
||||
part_path = node.query(
|
||||
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip()
|
||||
node.exec_in_container(['bash', '-c',
|
||||
'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
|
||||
p=part_path)], privileged=True)
|
||||
|
||||
|
||||
def test_merge_and_part_corruption(started_cluster):
|
||||
node1.query("SYSTEM STOP REPLICATION QUEUES replicated_mt")
|
||||
for i in range(4):
|
||||
node1.query("INSERT INTO replicated_mt SELECT toDate('2019-10-01'), number, number * number FROM numbers ({f}, 100000)".format(f=i*100000))
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM system.parts WHERE table='replicated_mt' AND active=1") == "4\n"
|
||||
|
||||
# Need to corrupt "border part" (left or right). If we will corrupt something in the middle
|
||||
# clickhouse will not consider merge as broken, because we have parts with the same min and max
|
||||
# block numbers.
|
||||
corrupt_data_part_on_disk(node1, 'replicated_mt', 'all_3_3_0')
|
||||
|
||||
with Pool(1) as p:
|
||||
def optimize_with_delay(x):
|
||||
node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30)
|
||||
|
||||
# corrupt part after merge already assigned, but not started
|
||||
res_opt = p.apply_async(optimize_with_delay, (1,))
|
||||
node1.query("CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0})
|
||||
# start merge
|
||||
node1.query("SYSTEM START REPLICATION QUEUES replicated_mt")
|
||||
res_opt.get()
|
||||
|
||||
# will hung if checked bug not fixed
|
||||
node1.query("ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, timeout=30)
|
||||
assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n"
|
@ -13,18 +13,6 @@ def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query('''
|
||||
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
||||
'''.format(replica=node.name))
|
||||
|
||||
node1.query('''
|
||||
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part=0;
|
||||
''')
|
||||
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
@ -54,6 +42,14 @@ def remove_part_from_disk(node, table, part_name):
|
||||
|
||||
|
||||
def test_check_normal_table_corruption(started_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS non_replicated_mt")
|
||||
|
||||
node1.query('''
|
||||
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part=0;
|
||||
''')
|
||||
|
||||
node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
|
||||
assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902",
|
||||
settings={"check_query_single_value_result": 0}) == "201902_1_1_0\t1\t\n"
|
||||
@ -94,8 +90,14 @@ def test_check_normal_table_corruption(started_cluster):
|
||||
|
||||
|
||||
def test_check_replicated_table_simple(started_cluster):
|
||||
node1.query("TRUNCATE TABLE replicated_mt")
|
||||
node2.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
for node in [node1, node2]:
|
||||
node.query("DROP TABLE IF EXISTS replicated_mt")
|
||||
|
||||
node.query('''
|
||||
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
||||
'''.format(replica=node.name))
|
||||
|
||||
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
|
||||
node2.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
|
||||
@ -119,34 +121,40 @@ def test_check_replicated_table_simple(started_cluster):
|
||||
|
||||
|
||||
def test_check_replicated_table_corruption(started_cluster):
|
||||
node1.query("TRUNCATE TABLE replicated_mt")
|
||||
node2.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
|
||||
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
|
||||
node2.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
for node in [node1, node2]:
|
||||
node.query("DROP TABLE IF EXISTS replicated_mt_1")
|
||||
|
||||
assert node1.query("SELECT count() from replicated_mt") == "4\n"
|
||||
assert node2.query("SELECT count() from replicated_mt") == "4\n"
|
||||
node.query('''
|
||||
CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
||||
'''.format(replica=node.name))
|
||||
|
||||
node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
|
||||
node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
|
||||
node2.query("SYSTEM SYNC REPLICA replicated_mt_1")
|
||||
|
||||
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|
||||
assert node2.query("SELECT count() from replicated_mt_1") == "4\n"
|
||||
|
||||
part_name = node1.query(
|
||||
"SELECT name from system.parts where table = 'replicated_mt' and partition_id = '201901' and active = 1").strip()
|
||||
"SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1").strip()
|
||||
|
||||
corrupt_data_part_on_disk(node1, "replicated_mt", part_name)
|
||||
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
|
||||
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
|
||||
corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name)
|
||||
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
|
||||
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
|
||||
p=part_name)
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
|
||||
node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
|
||||
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
|
||||
settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
|
||||
assert node1.query("SELECT count() from replicated_mt") == "4\n"
|
||||
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|
||||
|
||||
remove_part_from_disk(node2, "replicated_mt", part_name)
|
||||
assert node2.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
|
||||
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
|
||||
remove_part_from_disk(node2, "replicated_mt_1", part_name)
|
||||
assert node2.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
|
||||
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
|
||||
p=part_name)
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA replicated_mt")
|
||||
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
|
||||
node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
|
||||
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
|
||||
settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
|
||||
assert node1.query("SELECT count() from replicated_mt") == "4\n"
|
||||
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
|
||||
|
@ -69,3 +69,6 @@ def test_limited_fetches(started_cluster):
|
||||
|
||||
assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
|
||||
assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS t SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS t SYNC")
|
@ -732,3 +732,14 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
|
||||
|
||||
mysql_node.query("DROP DATABASE kill_clickhouse_while_insert")
|
||||
clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert")
|
||||
|
||||
|
||||
def utf8mb4_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
|
||||
mysql_node.query("CREATE DATABASE utf8mb4_test")
|
||||
mysql_node.query("CREATE TABLE utf8mb4_test.test (id INT(11) NOT NULL PRIMARY KEY, name VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4")
|
||||
mysql_node.query("INSERT INTO utf8mb4_test.test VALUES(1, '🦄'),(2, '\u2601')")
|
||||
clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name))
|
||||
check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n")
|
||||
|
||||
|
@ -228,3 +228,8 @@ def test_clickhouse_killed_while_insert_5_7(started_cluster, started_mysql_5_7,
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
|
||||
def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
materialize_with_ddl.clickhouse_killed_while_insert(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
|
||||
def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
|
||||
materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
|
@ -105,6 +105,8 @@ def test_kafka_json_as_string(kafka_cluster):
|
||||
kafka_flush_interval_ms=1000;
|
||||
''')
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
result = instance.query('SELECT * FROM test.kafka;')
|
||||
expected = '''\
|
||||
{"t": 123, "e": {"x": "woof"} }
|
||||
|
@ -443,10 +443,14 @@ def test_infinite_redirect(cluster):
|
||||
assert exception_raised
|
||||
|
||||
|
||||
def test_storage_s3_get_gzip(cluster):
|
||||
@pytest.mark.parametrize("extension,method", [
|
||||
("bin", "gzip"),
|
||||
("gz", "auto")
|
||||
])
|
||||
def test_storage_s3_get_gzip(cluster, extension, method):
|
||||
bucket = cluster.minio_bucket
|
||||
instance = cluster.instances["dummy"]
|
||||
filename = "test_get_gzip.bin"
|
||||
filename = f"test_get_gzip.{extension}"
|
||||
name = "test_get_gzip"
|
||||
data = [
|
||||
"Sophia Intrieri,55",
|
||||
@ -473,13 +477,15 @@ def test_storage_s3_get_gzip(cluster):
|
||||
put_s3_file_content(cluster, bucket, filename, buf.getvalue())
|
||||
|
||||
try:
|
||||
run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
|
||||
name, cluster.minio_host, cluster.minio_port, bucket, filename))
|
||||
run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
|
||||
'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
|
||||
'CSV',
|
||||
'{method}')""")
|
||||
|
||||
run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"]
|
||||
|
||||
finally:
|
||||
run_query(instance, "DROP TABLE {}".format(name))
|
||||
run_query(instance, f"DROP TABLE {name}")
|
||||
|
||||
|
||||
def test_storage_s3_put_uncompressed(cluster):
|
||||
@ -515,13 +521,17 @@ def test_storage_s3_put_uncompressed(cluster):
|
||||
uncompressed_content = get_s3_file_content(cluster, bucket, filename)
|
||||
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 753
|
||||
finally:
|
||||
run_query(instance, "DROP TABLE {}".format(name))
|
||||
run_query(instance, f"DROP TABLE {name}")
|
||||
|
||||
|
||||
def test_storage_s3_put_gzip(cluster):
|
||||
@pytest.mark.parametrize("extension,method", [
|
||||
("bin", "gzip"),
|
||||
("gz", "auto")
|
||||
])
|
||||
def test_storage_s3_put_gzip(cluster, extension, method):
|
||||
bucket = cluster.minio_bucket
|
||||
instance = cluster.instances["dummy"]
|
||||
filename = "test_put_gzip.bin"
|
||||
filename = f"test_put_gzip.{extension}"
|
||||
name = "test_put_gzip"
|
||||
data = [
|
||||
"'Joseph Tomlinson',5",
|
||||
@ -541,8 +551,10 @@ def test_storage_s3_put_gzip(cluster):
|
||||
"'Yolanda Joseph',89"
|
||||
]
|
||||
try:
|
||||
run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
|
||||
name, cluster.minio_host, cluster.minio_port, bucket, filename))
|
||||
run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
|
||||
'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
|
||||
'CSV',
|
||||
'{method}')""")
|
||||
|
||||
run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data)))
|
||||
|
||||
@ -553,4 +565,4 @@ def test_storage_s3_put_gzip(cluster):
|
||||
uncompressed_content = f.read().decode()
|
||||
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
|
||||
finally:
|
||||
run_query(instance, "DROP TABLE {}".format(name))
|
||||
run_query(instance, f"DROP TABLE {name}")
|
||||
|
@ -91,3 +91,6 @@ def test_system_replicated_fetches(started_cluster):
|
||||
for elem in fetches_result:
|
||||
assert elem['elapsed'] >= prev_elapsed, "Elapsed time decreasing prev {}, next {}? It's a bug".format(prev_elapsed, elem['elapsed'])
|
||||
prev_elapsed = elem['elapsed']
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS t SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS t SYNC")
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CLICKHOUSE_LOG_COMMENT=''
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user