Merge branch 'master' into in_memory_raft

This commit is contained in:
alesapin 2021-02-01 14:30:11 +03:00
commit 0aca40d8cb
159 changed files with 2749 additions and 1148 deletions

2
.gitmodules vendored
View File

@ -184,7 +184,7 @@
url = https://github.com/ClickHouse-Extras/krb5
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
url = https://github.com/ClickHouse-Extras/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring

View File

@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init;
int sched_getcpu(void)
{
int r;
unsigned cpu;
unsigned cpu = 0;
#ifdef VDSO_GETCPU_SYM
getcpu_f f = (getcpu_f)vdso_func;

View File

@ -116,8 +116,8 @@ void Connection::connect(const char* db,
if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Sets UTF-8 as default encoding.
if (mysql_set_character_set(driver.get(), "UTF8"))
/// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/
if (mysql_set_character_set(driver.get(), "utf8mb4"))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
is_connected = true;

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54447)
SET(VERSION_REVISION 54448)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 2)
SET(VERSION_MINOR 3)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
SET(VERSION_DESCRIBE v21.2.1.1-prestable)
SET(VERSION_STRING 21.2.1.1)
SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc)
SET(VERSION_DESCRIBE v21.3.1.1-prestable)
SET(VERSION_STRING 21.3.1.1)
# end of autochange

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531
Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823
Subproject commit e11f3c971570cf6a31006cd21cadf41a259c360a

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.2.1.1) unstable; urgency=low
clickhouse (21.3.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 11 Jan 2021 11:12:08 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 01 Feb 2021 12:50:53 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.2.1.*
ARG version=21.3.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.2.1.*
ARG version=21.3.1.*
ARG gosu_ver=1.10
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.2.1.*
ARG version=21.3.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -253,8 +253,12 @@ function run_tests
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
# Hyperscan
00926_multimatch
00929_multi_match_edit_distance
01681_hyperscan_debug_assertion
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments

View File

@ -1,12 +1,16 @@
# docker build -t yandex/clickhouse-style-test .
FROM ubuntu:20.04
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
# For |& syntax
SHELL ["bash", "-c"]
CMD cd /ClickHouse/utils/check-style && \
./check-style -n | tee /test_output/style_output.txt && \
./check-typos | tee /test_output/typos_output.txt && \
./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \
./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \
./shellcheck-run.sh | tee /test_output/shellcheck_output.txt
./check-style -n |& tee /test_output/style_output.txt && \
./check-typos |& tee /test_output/typos_output.txt && \
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt && \
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt && \
true

View File

@ -1944,6 +1944,21 @@ Possible values:
Default value: 16.
## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and cant be changed in a user session.
Possible values:
- Any positive integer.
Default value: 16.
**See Also**
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
## validate_polygons {#validate_polygons}
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.

View File

@ -241,7 +241,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Parameters**
- `window` — Length of the sliding window in seconds.
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` - It is an optional argument.
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, its value cant exceed the Int64 maximum, which is 2^63 - 1).

View File

@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
Note that these functions work slowly.
Note that these functions work slowly until ClickHouse 21.1.
## encrypt {#encrypt}
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
@ -52,57 +52,38 @@ Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
`comment` String,
`secret` String
)
ENGINE = Memory
```
Insert this data:
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
Example without `iv`:
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Example with `iv`:
Query:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Example with `-gcm`:
@ -110,40 +91,26 @@ Example with `-gcm`:
Query:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Example with `-gcm` mode and with `aad`:
Query:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Result:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported encryption modes:
@ -156,7 +123,7 @@ Supported encryption modes:
**Syntax**
```sql
``` sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
@ -164,78 +131,98 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Create this table:
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Insert this data:
Result:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
┌─ciphertexts_equal─┐
│ 1 │
└───────────────────┘
```
Example without `iv`:
But `encrypt` fails when `key` or `iv` is longer than expected:
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Result:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
Example with `iv`:
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
Query:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Result:
```text
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Notice how supplying even longer `IV` produces the same result
Query:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Result:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Which is binary equal to what MySQL produces on same inputs:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
## decrypt {#decrypt}
This function decrypts data using these modes:
This function decrypts ciphertext into a plaintext using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
@ -247,7 +234,7 @@ This function decrypts data using these modes:
**Syntax**
```sql
``` sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
**Examples**
Create this table:
Re-using table from [encrypt](./encryption-functions.md#encrypt).
Query:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Query:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Result:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
``` text
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Now let's try to decrypt all that data.
Query:
``` sql
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test
```
Result:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
```
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
## aes_decrypt_mysql {#aes_decrypt_mysql}
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Supported decryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
@ -321,7 +313,7 @@ Supported decryption modes:
**Syntax**
```sql
``` sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
**Examples**
Create this table:
Query:
Let's decrypt data we've previously encrypted with MySQL:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
Insert this data:
Query:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
Query:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
```
Result:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
┌─plaintext─┐
│ Secret │
└───────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -115,9 +115,20 @@ LIMIT 10
## IPv6StringToNum(s) {#ipv6stringtonums}
The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes.
The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes.
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
HEX can be uppercase or lowercase.
``` sql
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
```
``` text
┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐
│ ::ffff:127.0.0.1 │
└─────────────────────────────────────────────┘
```
## IPv4ToIPv6(x) {#ipv4toipv6x}
Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples:
@ -214,6 +225,7 @@ SELECT
## toIPv6(string) {#toipv6string}
An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`.
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
``` sql
WITH
@ -243,6 +255,15 @@ SELECT
└───────────────────────────────────┴──────────────────────────────────┘
```
``` sql
SELECT toIPv6('127.0.0.1')
```
``` text
┌─toIPv6('127.0.0.1')─┐
│ ::ffff:127.0.0.1 │
└─────────────────────┘
```
## isIPv4String

View File

@ -62,12 +62,12 @@ public:
bool randomize_, size_t max_iterations_, double max_time_,
const String & json_path_, size_t confidence_,
const String & query_id_, const String & query_to_execute_, bool continue_on_errors_,
bool print_stacktrace_, const Settings & settings_)
bool reconnect_, bool print_stacktrace_, const Settings & settings_)
:
concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
json_path(json_path_), confidence(confidence_), query_id(query_id_),
query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_),
query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_),
print_stacktrace(print_stacktrace_), settings(settings_),
shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
pool(concurrency)
@ -155,6 +155,7 @@ private:
String query_id;
String query_to_execute;
bool continue_on_errors;
bool reconnect;
bool print_stacktrace;
const Settings & settings;
SharedContextHolder shared_context;
@ -404,9 +405,14 @@ private:
void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
{
Stopwatch watch;
Connection & connection = **connection_entries[connection_index];
if (reconnect)
connection.disconnect();
RemoteBlockInputStream stream(
*(*connection_entries[connection_index]),
query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage);
if (!query_id.empty())
stream.setQueryId(query_id);
@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
("confidence", value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
("query_id", value<std::string>()->default_value(""), "")
("continue_on_errors", "continue testing even if a query fails")
("reconnect", "establish new connection for every query")
;
Settings settings;
@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
options["confidence"].as<size_t>(),
options["query_id"].as<std::string>(),
options["query"].as<std::string>(),
options.count("continue_on_errors") > 0,
options.count("continue_on_errors"),
options.count("reconnect"),
print_stacktrace,
settings);
return benchmark.run();

View File

@ -513,7 +513,7 @@ private:
}
protected:
void extractColumns(const IColumn ** columns, const IColumn ** aggr_columns) const
ssize_t extractColumns(const IColumn ** columns, const IColumn ** aggr_columns, ssize_t if_argument_pos) const
{
if (tuple_argument)
{
@ -526,6 +526,13 @@ protected:
for (size_t i = 0; i < args_count; ++i)
columns[i] = aggr_columns[i];
}
if (if_argument_pos >= 0)
{
columns[args_count] = aggr_columns[if_argument_pos];
return args_count;
}
else
return -1;
}
bool tuple_argument;
@ -551,8 +558,8 @@ public:
Arena * arena,
ssize_t if_argument_pos = -1) const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
Base::addBatch(batch_size, places, place_offset, ex_columns, arena, if_argument_pos);
}
@ -560,8 +567,8 @@ public:
void addBatchSinglePlace(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
Base::addBatchSinglePlace(batch_size, place, ex_columns, arena, if_argument_pos);
}
@ -574,8 +581,8 @@ public:
Arena * arena,
ssize_t if_argument_pos = -1) const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
Base::addBatchSinglePlaceNotNull(batch_size, place, ex_columns, null_map, arena, if_argument_pos);
}
@ -584,8 +591,8 @@ public:
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
const IColumn * ex_columns[args_count + (if_argument_pos >= 0)];
if_argument_pos = extractColumns(ex_columns, columns, if_argument_pos);
Base::addBatchSinglePlaceFromInterval(batch_begin, batch_end, place, ex_columns, arena, if_argument_pos);
}
@ -595,7 +602,7 @@ public:
const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
extractColumns(ex_columns, columns, -1);
Base::addBatchArray(batch_size, places, place_offset, ex_columns, offsets, arena);
}
@ -610,7 +617,7 @@ public:
Arena * arena) const override
{
const IColumn * ex_columns[args_count];
extractColumns(ex_columns, columns);
extractColumns(ex_columns, columns, -1);
Base::addBatchLookupTable8(batch_size, map, place_offset, init, key, ex_columns, arena);
}

View File

@ -191,6 +191,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources)
add_object_library(clickhouse_processors_merges Processors/Merges)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
add_object_library(clickhouse_coordination Coordination)
set (DBMS_COMMON_LIBRARIES)

View File

@ -7,8 +7,10 @@
#include <atomic>
#include <Poco/Net/StreamSocket.h>
#include <Common/Exception.h>
#include <Common/Stopwatch.h>
#include <Common/ShellCommand.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/copyData.h>
/** In a loop it connects to the server and immediately breaks the connection.
@ -18,22 +20,26 @@
int main(int argc, char ** argv)
try
{
using namespace DB;
size_t num_iterations = 1;
size_t num_threads = 1;
std::string host = "localhost";
uint16_t port = 9000;
if (argc >= 2)
num_iterations = DB::parse<size_t>(argv[1]);
num_iterations = parse<size_t>(argv[1]);
if (argc >= 3)
num_threads = DB::parse<size_t>(argv[2]);
num_threads = parse<size_t>(argv[2]);
if (argc >= 4)
host = argv[3];
if (argc >= 5)
port = DB::parse<uint16_t>(argv[4]);
port = parse<uint16_t>(argv[4]);
WriteBufferFromFileDescriptor out(STDERR_FILENO);
std::atomic_bool cancel{false};
std::vector<std::thread> threads(num_threads);
@ -45,44 +51,32 @@ try
{
std::cerr << ".";
Poco::Net::SocketAddress address(host, port);
int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
if (fd < 0)
DB::throwFromErrno("Cannot create socket", 0);
linger linger_value;
linger_value.l_onoff = 1;
linger_value.l_linger = 0;
if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value)))
DB::throwFromErrno("Cannot set linger", 0);
try
{
Stopwatch watch;
Poco::Net::SocketAddress address(host, port);
Poco::Net::StreamSocket socket;
//socket.setLinger(1, 0);
int res = connect(fd, address.addr(), address.length());
if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK)
socket.connectNB(address);
if (!socket.poll(Poco::Timespan(1000000),
Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_WRITE | Poco::Net::Socket::SELECT_ERROR))
{
close(fd);
DB::throwFromErrno("Cannot connect", 0);
}
/// Allow to debug the server.
/* auto command = ShellCommand::execute("kill -STOP $(pidof clickhouse-server)");
copyData(command->err, out);
copyData(command->out, out);
command->wait();*/
close(fd);
if (watch.elapsedSeconds() > 0.1)
{
std::cerr << watch.elapsedSeconds() << "\n";
cancel = true;
break;
std::cerr << "Timeout\n";
/* cancel = true;
break;*/
}
}
catch (const Poco::Exception & e)
{
std::cerr << e.displayText() << "\n";
cancel = true;
break;
}
}
});

View File

@ -289,7 +289,8 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -356,7 +356,8 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_sse)
{
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{

View File

@ -17,13 +17,17 @@ namespace DB
static UInt64 toBits64(const Int8 * bytes64)
{
static const __m128i zero16 = _mm_setzero_si128();
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
<< 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
<< 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
<< 48);
UInt64 res =
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48);
return ~res;
}
#endif
@ -49,7 +53,7 @@ size_t countBytesInFilter(const UInt8 * filt, size_t sz)
#endif
for (; pos < end; ++pos)
count += *pos > 0;
count += *pos != 0;
return count;
}
@ -82,7 +86,7 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
#endif
for (; pos < end; ++pos)
count += (*pos & ~*pos2) > 0;
count += (*pos & ~*pos2) != 0;
return count;
}
@ -232,9 +236,10 @@ namespace
while (filt_pos < filt_end_aligned)
{
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = ~mask;
if (mask == 0)
{

View File

@ -119,6 +119,13 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
{
/// Under high memory pressure, any new allocation will definitelly lead
/// to MEMORY_LIMIT_EXCEEDED exception.
///
/// And in this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.
MemoryTracker::LockExceptionInThread lock_memory_tracker;
try
{
if (start_of_message.empty())

View File

@ -53,7 +53,7 @@ public:
if constexpr (std::is_arithmetic_v<U>)
{
ReadBufferFromString in(l);
T parsed;
U parsed;
readText(parsed, in);
return operator()(parsed, r);
}
@ -113,7 +113,7 @@ public:
if constexpr (std::is_arithmetic_v<U>)
{
ReadBufferFromString in(l);
T parsed;
U parsed;
readText(parsed, in);
return operator()(parsed, r);
}

View File

@ -98,14 +98,31 @@ public:
}
else
{
const auto first_u32 = UTF8::convert(needle);
const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
auto first_u32 = UTF8::convertUTF8ToCodePoint(needle, needle_size);
/// Invalid UTF-8
if (!first_u32)
{
/// Process it verbatim as a sequence of bytes.
size_t src_len = UTF8::seqLength(*needle);
memcpy(l_seq, needle, src_len);
memcpy(u_seq, needle, src_len);
}
else
{
uint32_t first_l_u32 = Poco::Unicode::toLower(*first_u32);
uint32_t first_u_u32 = Poco::Unicode::toUpper(*first_u32);
/// lower and uppercase variants of the first octet of the first character in `needle`
size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
if (length_l != length_r)
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
}
/// lower and uppercase variants of the first octet of the first character in `needle`
UTF8::convert(first_l_u32, l_seq, sizeof(l_seq));
l = l_seq[0];
UTF8::convert(first_u_u32, u_seq, sizeof(u_seq));
u = u_seq[0];
}
@ -128,18 +145,21 @@ public:
continue;
}
const auto src_len = UTF8::seqLength(*needle_pos);
const auto c_u32 = UTF8::convert(needle_pos);
size_t src_len = std::min<size_t>(needle_end - needle_pos, UTF8::seqLength(*needle_pos));
auto c_u32 = UTF8::convertUTF8ToCodePoint(needle_pos, src_len);
const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
if (c_u32)
{
int c_l_u32 = Poco::Unicode::toLower(*c_u32);
int c_u_u32 = Poco::Unicode::toUpper(*c_u32);
const auto dst_l_len = static_cast<uint8_t>(UTF8::convert(c_l_u32, l_seq, sizeof(l_seq)));
const auto dst_u_len = static_cast<uint8_t>(UTF8::convert(c_u_u32, u_seq, sizeof(u_seq)));
uint8_t dst_l_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_l_u32, l_seq, sizeof(l_seq)));
uint8_t dst_u_len = static_cast<uint8_t>(UTF8::convertCodePointToUTF8(c_u_u32, u_seq, sizeof(u_seq)));
/// @note Unicode standard states it is a rare but possible occasion
if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
/// @note Unicode standard states it is a rare but possible occasion
if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
}
cache_actual_len += src_len;
if (cache_actual_len < n)
@ -164,7 +184,7 @@ public:
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * /*haystack_end*/, const CharT * pos) const
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
{
#ifdef __SSE4_1__
@ -183,11 +203,20 @@ public:
pos += cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (needle_pos < needle_end &&
Poco::Unicode::toLower(UTF8::convert(pos)) ==
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
while (needle_pos < needle_end)
{
/// @note assuming sequences for lowercase and uppercase have exact same length
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
@ -209,10 +238,19 @@ public:
pos += first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (needle_pos < needle_end &&
Poco::Unicode::toLower(UTF8::convert(pos)) ==
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
while (needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
@ -270,11 +308,20 @@ public:
auto haystack_pos = haystack + cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (haystack_pos < haystack_end && needle_pos < needle_end &&
Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
/// @note assuming sequences for lowercase and uppercase have exact same length
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
@ -302,10 +349,19 @@ public:
auto haystack_pos = haystack + first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (haystack_pos < haystack_end && needle_pos < needle_end &&
Poco::Unicode::toLower(UTF8::convert(haystack_pos)) ==
Poco::Unicode::toLower(UTF8::convert(needle_pos)))
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;

View File

@ -1,5 +1,6 @@
#pragma once
#include <optional>
#include <common/types.h>
#include <Common/BitHelpers.h>
#include <Poco/UTF8Encoding.h>
@ -73,26 +74,27 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
return res;
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
int convert(const CharT * bytes)
size_t convertCodePointToUTF8(uint32_t code_point, CharT * out_bytes, size_t out_length)
{
static const Poco::UTF8Encoding utf8;
return utf8.convert(reinterpret_cast<const uint8_t *>(bytes));
int res = utf8.convert(code_point, reinterpret_cast<uint8_t *>(out_bytes), out_length);
assert(res >= 0);
return res;
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
int convert(int ch, CharT * bytes, int length)
std::optional<uint32_t> convertUTF8ToCodePoint(const CharT * in_bytes, size_t in_length)
{
static const Poco::UTF8Encoding utf8;
return utf8.convert(ch, reinterpret_cast<uint8_t *>(bytes), length);
int res = utf8.queryConvert(reinterpret_cast<const uint8_t *>(in_bytes), in_length);
if (res >= 0)
return res;
return {};
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
int queryConvert(const CharT * bytes, int length)
{
static const Poco::UTF8Encoding utf8;
return utf8.queryConvert(reinterpret_cast<const uint8_t *>(bytes), length);
}
/// returns UTF-8 wcswidth. Invalid sequence is treated as zero width character.
/// `prefix` is used to compute the `\t` width which extends the string before

View File

@ -60,7 +60,7 @@ namespace VolnitskyTraits
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
template <typename Callback>
static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
static inline void putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
{
struct Chars
{
@ -109,199 +109,234 @@ namespace VolnitskyTraits
putNGramBase(n, offset);
}
template <bool CaseSensitive, bool ASCII, typename Callback>
static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
template <typename Callback>
static inline void putNGramUTF8CaseInsensitive(
const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
{
if constexpr (CaseSensitive)
const UInt8 * end = begin + size;
struct Chars
{
putNGramBase(toNGram(pos), offset);
UInt8 c0;
UInt8 c1;
};
union
{
VolnitskyTraits::Ngram n;
Chars chars;
};
n = toNGram(pos);
if (isascii(chars.c0) && isascii(chars.c1))
{
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
}
else
{
if constexpr (ASCII)
/** n-gram (in the case of n = 2)
* can be entirely located within one code point,
* or intersect with two code points.
*
* In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
* and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
*
* It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
* as well as composition / decomposition and other features.
*
* It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
*/
using Seq = UInt8[6];
if (UTF8::isContinuationOctet(chars.c1))
{
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
}
else
{
struct Chars
/// ngram is inside a sequence
auto seq_pos = pos;
UTF8::syncBackward(seq_pos, begin);
auto u32 = UTF8::convertUTF8ToCodePoint(seq_pos, end - seq_pos);
/// Invalid UTF-8
if (!u32)
{
UInt8 c0;
UInt8 c1;
};
union
{
VolnitskyTraits::Ngram n;
Chars chars;
};
n = toNGram(pos);
if (isascii(chars.c0) && isascii(chars.c1))
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
putNGramBase(n, offset);
}
else
{
/** n-gram (in the case of n = 2)
* can be entirely located within one code point,
* or intersect with two code points.
*
* In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
* and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
*
* It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
* as well as composition / decomposition and other features.
*
* It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
*/
int l_u32 = Poco::Unicode::toLower(*u32);
int u_u32 = Poco::Unicode::toUpper(*u32);
using Seq = UInt8[6];
if (UTF8::isContinuationOctet(chars.c1))
/// symbol is case-independent
if (l_u32 == u_u32)
{
/// ngram is inside a sequence
auto seq_pos = pos;
UTF8::syncBackward(seq_pos, begin);
const auto u32 = UTF8::convert(seq_pos);
const auto l_u32 = Poco::Unicode::toLower(u32);
const auto u_u32 = Poco::Unicode::toUpper(u32);
/// symbol is case-independent
if (l_u32 == u_u32)
putNGramBase(n, offset);
else
{
/// where is the given ngram in respect to the start of UTF-8 sequence?
const auto seq_ngram_offset = pos - seq_pos;
Seq seq;
/// put ngram for lowercase
UTF8::convert(l_u32, seq, sizeof(seq));
chars.c0 = seq[seq_ngram_offset];
chars.c1 = seq[seq_ngram_offset + 1];
putNGramBase(n, offset);
/// put ngram for uppercase
UTF8::convert(u_u32, seq, sizeof(seq));
chars.c0 = seq[seq_ngram_offset]; //-V519
chars.c1 = seq[seq_ngram_offset + 1]; //-V519
putNGramBase(n, offset);
}
putNGramBase(n, offset);
}
else
{
/// ngram is on the boundary of two sequences
/// first sequence may start before u_pos if it is not ASCII
auto first_seq_pos = pos;
UTF8::syncBackward(first_seq_pos, begin);
/// where is the given ngram in respect to the start of first UTF-8 sequence?
const auto seq_ngram_offset = pos - first_seq_pos;
/// where is the given ngram in respect to the start of UTF-8 sequence?
size_t seq_ngram_offset = pos - seq_pos;
const auto first_u32 = UTF8::convert(first_seq_pos);
const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
Seq seq;
/// second sequence always start immediately after u_pos
auto second_seq_pos = pos + 1;
/// put ngram for lowercase
size_t length_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(l_u32, seq, sizeof(seq));
assert(length_l >= 2);
chars.c0 = seq[seq_ngram_offset];
chars.c1 = seq[seq_ngram_offset + 1];
putNGramBase(n, offset);
const auto second_u32 = UTF8::convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
/// put ngram for uppercase
size_t length_r [[maybe_unused]] = UTF8::convertCodePointToUTF8(u_u32, seq, sizeof(seq));
assert(length_r >= 2);
chars.c0 = seq[seq_ngram_offset]; //-V519
chars.c1 = seq[seq_ngram_offset + 1]; //-V519
putNGramBase(n, offset);
}
}
}
else
{
/// ngram is on the boundary of two sequences
/// first sequence may start before u_pos if it is not ASCII
auto first_seq_pos = pos;
UTF8::syncBackward(first_seq_pos, begin);
/// where is the given ngram in respect to the start of first UTF-8 sequence?
size_t seq_ngram_offset = pos - first_seq_pos;
/// both symbols are case-independent
if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
{
putNGramBase(n, offset);
}
else if (first_l_u32 == first_u_u32)
{
/// first symbol is case-independent
Seq seq;
auto first_u32 = UTF8::convertUTF8ToCodePoint(first_seq_pos, end - first_seq_pos);
int first_l_u32 = 0;
int first_u_u32 = 0;
/// put ngram for lowercase
UTF8::convert(second_l_u32, seq, sizeof(seq));
chars.c1 = seq[0];
putNGramBase(n, offset);
if (first_u32)
{
first_l_u32 = Poco::Unicode::toLower(*first_u32);
first_u_u32 = Poco::Unicode::toUpper(*first_u32);
}
/// put ngram from uppercase, if it is different
UTF8::convert(second_u_u32, seq, sizeof(seq));
if (chars.c1 != seq[0])
{
chars.c1 = seq[0];
putNGramBase(n, offset);
}
}
else if (second_l_u32 == second_u_u32)
{
/// second symbol is case-independent
Seq seq;
/// second sequence always start immediately after u_pos
auto second_seq_pos = pos + 1;
/// put ngram for lowercase
UTF8::convert(first_l_u32, seq, sizeof(seq));
chars.c0 = seq[seq_ngram_offset];
putNGramBase(n, offset);
auto second_u32 = UTF8::convertUTF8ToCodePoint(second_seq_pos, end - second_seq_pos);
int second_l_u32 = 0;
int second_u_u32 = 0;
/// put ngram for uppercase, if it is different
UTF8::convert(first_u_u32, seq, sizeof(seq));
if (chars.c0 != seq[seq_ngram_offset])
{
chars.c0 = seq[seq_ngram_offset];
putNGramBase(n, offset);
}
}
else
{
Seq first_l_seq;
Seq first_u_seq;
Seq second_l_seq;
Seq second_u_seq;
if (second_u32)
{
second_l_u32 = Poco::Unicode::toLower(*second_u32);
second_u_u32 = Poco::Unicode::toUpper(*second_u32);
}
UTF8::convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
UTF8::convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
UTF8::convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
UTF8::convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
/// both symbols are case-independent
if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
{
putNGramBase(n, offset);
}
else if (first_l_u32 == first_u_u32)
{
/// first symbol is case-independent
Seq seq;
auto c0l = first_l_seq[seq_ngram_offset];
auto c0u = first_u_seq[seq_ngram_offset];
auto c1l = second_l_seq[0];
auto c1u = second_u_seq[0];
/// put ngram for lowercase
size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, seq, sizeof(seq));
assert(size_l >= 1);
chars.c1 = seq[0];
putNGramBase(n, offset);
/// ngram for ll
chars.c0 = c0l;
chars.c1 = c1l;
putNGramBase(n, offset);
/// put ngram from uppercase, if it is different
size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, seq, sizeof(seq));
assert(size_u >= 1);
if (chars.c1 != seq[0])
{
chars.c1 = seq[0];
putNGramBase(n, offset);
}
}
else if (second_l_u32 == second_u_u32)
{
/// second symbol is case-independent
Seq seq;
if (c0l != c0u)
{
/// ngram for Ul
chars.c0 = c0u;
chars.c1 = c1l;
putNGramBase(n, offset);
}
/// put ngram for lowercase
size_t size_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, seq, sizeof(seq));
assert(size_l > seq_ngram_offset);
chars.c0 = seq[seq_ngram_offset];
putNGramBase(n, offset);
if (c1l != c1u)
{
/// ngram for lU
chars.c0 = c0l;
chars.c1 = c1u;
putNGramBase(n, offset);
}
/// put ngram for uppercase, if it is different
size_t size_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, seq, sizeof(seq));
assert(size_u > seq_ngram_offset);
if (chars.c0 != seq[seq_ngram_offset])
{
chars.c0 = seq[seq_ngram_offset];
putNGramBase(n, offset);
}
}
else
{
Seq first_l_seq;
Seq first_u_seq;
Seq second_l_seq;
Seq second_u_seq;
if (c0l != c0u && c1l != c1u)
{
/// ngram for UU
chars.c0 = c0u;
chars.c1 = c1u;
putNGramBase(n, offset);
}
}
size_t size_first_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_l_u32, first_l_seq, sizeof(first_l_seq));
size_t size_first_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(first_u_u32, first_u_seq, sizeof(first_u_seq));
size_t size_second_l [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_l_u32, second_l_seq, sizeof(second_l_seq));
size_t size_second_u [[maybe_unused]] = UTF8::convertCodePointToUTF8(second_u_u32, second_u_seq, sizeof(second_u_seq));
assert(size_first_l > seq_ngram_offset);
assert(size_first_u > seq_ngram_offset);
assert(size_second_l > 0);
assert(size_second_u > 0);
auto c0l = first_l_seq[seq_ngram_offset];
auto c0u = first_u_seq[seq_ngram_offset];
auto c1l = second_l_seq[0];
auto c1u = second_u_seq[0];
/// ngram for ll
chars.c0 = c0l;
chars.c1 = c1l;
putNGramBase(n, offset);
if (c0l != c0u)
{
/// ngram for Ul
chars.c0 = c0u;
chars.c1 = c1l;
putNGramBase(n, offset);
}
if (c1l != c1u)
{
/// ngram for lU
chars.c0 = c0l;
chars.c1 = c1u;
putNGramBase(n, offset);
}
if (c0l != c0u && c1l != c1u)
{
/// ngram for UU
chars.c0 = c0u;
chars.c1 = c1u;
putNGramBase(n, offset);
}
}
}
}
}
template <bool CaseSensitive, bool ASCII, typename Callback>
static inline void putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
{
if constexpr (CaseSensitive)
putNGramBase(toNGram(pos), offset);
else if constexpr (ASCII)
putNGramASCIICaseInsensitive(pos, offset, std::forward<Callback>(putNGramBase));
else
putNGramUTF8CaseInsensitive(pos, offset, begin, size, std::forward<Callback>(putNGramBase));
}
}
@ -310,17 +345,17 @@ template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
class VolnitskyBase
{
protected:
const UInt8 * const needle;
const size_t needle_size;
const UInt8 * const needle_end = needle + needle_size;
const UInt8 * needle;
size_t needle_size;
const UInt8 * needle_end = needle + needle_size;
/// For how long we move, if the n-gram from haystack is not found in the hash table.
const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
const bool fallback; /// Do we need to use the fallback algorithm.
bool fallback; /// Do we need to use the fallback algorithm.
FallbackSearcher fallback_searcher;
@ -346,7 +381,7 @@ public:
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
/// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first.
for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(needle + i, i + 1, needle, needle_size, callback);
}
@ -493,6 +528,7 @@ public:
reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
i + 1,
reinterpret_cast<const UInt8 *>(cur_needle_data),
cur_needle_size,
callback);
}
}

View File

@ -120,9 +120,10 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
for (size_t offset = min_size; offset < max_size; offset += 16)
{
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
zero16));
mask = ~mask;
if (mask)
{

View File

@ -36,6 +36,7 @@
#define DEFAULT_MERGE_BLOCK_SIZE 8192
#define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5
#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
#define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
#define DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC 15
#define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE 1024

View File

@ -391,6 +391,7 @@ class IColumn;
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \
M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \
M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \
M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \

View File

@ -504,7 +504,7 @@ private:
using namespace traits_;
using namespace impl_;
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmetic : public IFunction
{
static constexpr const bool is_plus = IsOperation<Op>::plus;
@ -542,16 +542,54 @@ class FunctionBinaryArithmetic : public IFunction
>(type, std::forward<F>(f));
}
template <typename F>
static bool castTypeNoFloats(const IDataType * type, F && f)
{
return castTypeToEither<
DataTypeUInt8,
DataTypeUInt16,
DataTypeUInt32,
DataTypeUInt64,
DataTypeUInt256,
DataTypeInt8,
DataTypeInt16,
DataTypeInt32,
DataTypeInt64,
DataTypeInt128,
DataTypeInt256,
DataTypeDate,
DataTypeDateTime,
DataTypeDecimal<Decimal32>,
DataTypeDecimal<Decimal64>,
DataTypeDecimal<Decimal128>,
DataTypeDecimal<Decimal256>,
DataTypeFixedString
>(type, std::forward<F>(f));
}
template <typename F>
static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
{
return castType(left, [&](const auto & left_)
if constexpr (valid_on_float_arguments)
{
return castType(right, [&](const auto & right_)
return castType(left, [&](const auto & left_)
{
return f(left_, right_);
return castType(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
});
}
else
{
return castTypeNoFloats(left, [&](const auto & left_)
{
return castTypeNoFloats(right, [&](const auto & right_)
{
return f(left_, right_);
});
});
}
}
static FunctionOverloadResolverPtr
@ -1319,11 +1357,11 @@ public:
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>
{
public:
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>;
using Monotonicity = typename Base::Monotonicity;
static FunctionPtr create(
@ -1488,7 +1526,7 @@ private:
DataTypePtr return_type;
};
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true, bool valid_on_float_arguments = true>
class BinaryArithmeticOverloadResolver : public IFunctionOverloadResolverImpl
{
public:
@ -1512,14 +1550,14 @@ public:
|| (arguments[1].column && isColumnConst(*arguments[1].column))))
{
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments>::create(
FunctionBinaryArithmeticWithConstants<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(
arguments[0], arguments[1], return_type, context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
return std::make_unique<DefaultFunction>(
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::create(context),
FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::create(context),
ext::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
}
@ -1530,7 +1568,7 @@ public:
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>::getReturnTypeImplStatic(arguments, context);
return FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments, valid_on_float_arguments>::getReturnTypeImplStatic(arguments, context);
}
private:

View File

@ -68,12 +68,12 @@ struct AddSecondsImpl : public AddOnDateTime64DefaultImpl<AddSecondsImpl>
static constexpr auto name = "addSeconds";
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
{
return t + delta;
}
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(DayNum(d)) + delta;
}
@ -92,7 +92,7 @@ struct AddMinutesImpl : public AddOnDateTime64DefaultImpl<AddMinutesImpl>
return t + delta * 60;
}
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(DayNum(d)) + delta * 60;
}
@ -111,7 +111,7 @@ struct AddHoursImpl : public AddOnDateTime64DefaultImpl<AddHoursImpl>
return t + delta * 3600;
}
static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
}
@ -125,18 +125,12 @@ struct AddDaysImpl : public AddOnDateTime64DefaultImpl<AddDaysImpl>
static constexpr auto name = "addDays";
// static inline UInt32 execute(UInt64 t, Int64 delta, const DateLUTImpl & time_zone)
// {
// // TODO (nemkov): LUT does not support out-of range date values for now.
// return time_zone.addDays(t, delta);
// }
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addDays(t, delta);
}
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
{
return d + delta;
}
@ -155,7 +149,7 @@ struct AddWeeksImpl : public AddOnDateTime64DefaultImpl<AddWeeksImpl>
return time_zone.addWeeks(t, delta);
}
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
{
return d + delta * 7;
}

View File

@ -263,6 +263,12 @@ public:
static constexpr auto name = "IPv6StringToNum";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6StringToNum>(); }
static inline bool tryParseIPv4(const char * pos)
{
UInt32 result = 0;
return DB::parseIPv4(pos, reinterpret_cast<unsigned char *>(&result));
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
@ -270,8 +276,8 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
}
@ -292,13 +298,27 @@ public:
const ColumnString::Chars & vec_src = col_in->getChars();
const ColumnString::Offsets & offsets_src = col_in->getOffsets();
size_t src_offset = 0;
char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:";
for (size_t out_offset = 0, i = 0;
out_offset < vec_res.size();
out_offset += IPV6_BINARY_LENGTH, ++i)
for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i)
{
/// In case of failure, the function fills vec_res with zero bytes.
parseIPv6(reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
/// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes.
/// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address.
/// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address.
if (tryParseIPv4(reinterpret_cast<const char *>(&vec_src[src_offset])))
{
std::memcpy(
src_ipv4_buf + std::strlen("::ffff:"),
reinterpret_cast<const char *>(&vec_src[src_offset]),
std::min<UInt64>(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1));
parseIPv6(src_ipv4_buf, reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
}
else
{
parseIPv6(
reinterpret_cast<const char *>(&vec_src[src_offset]), reinterpret_cast<unsigned char *>(&vec_res[out_offset]));
}
src_offset = offsets_src[i];
}

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
}
template <class T>
inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
inline constexpr bool is_gcd_lcm_implemeted = !is_big_int_v<T>;
template <typename A, typename B, typename Impl, typename Name>
struct GCDLCMImpl
@ -33,7 +33,7 @@ struct GCDLCMImpl
static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
apply(A, B)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers", Name::name);
}
template <typename Result = ResultType>

View File

@ -465,7 +465,7 @@ std::vector<size_t> buildKMPPrefixFunction(const SliceType & pattern, const Equa
for (size_t i = 1; i < pattern.size; ++i)
{
result[i] = 0;
for (auto length = i; length > 0;)
for (size_t length = i; length > 0;)
{
length = result[length - 1];
if (isEqualFunc(pattern, i, length))
@ -695,7 +695,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
if (size >= 0)
{
auto length = static_cast<size_t>(size);
size_t length = static_cast<size_t>(size);
if (length > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
length, MAX_ARRAY_SIZE);
@ -711,7 +711,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
}
else
{
auto length = static_cast<size_t>(-size);
size_t length = -static_cast<size_t>(size);
if (length > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
length, MAX_ARRAY_SIZE);
@ -744,7 +744,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
if (size >= 0)
{
auto length = static_cast<size_t>(size);
size_t length = static_cast<size_t>(size);
if (length > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
length, MAX_ARRAY_SIZE);
@ -760,7 +760,7 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
}
else
{
auto length = static_cast<size_t>(-size);
size_t length = -static_cast<size_t>(size);
if (length > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}, maximum: {}",
length, MAX_ARRAY_SIZE);

View File

@ -135,15 +135,16 @@ struct LowerUpperUTF8Impl
{
static const Poco::UTF8Encoding utf8;
int src_sequence_length = UTF8::seqLength(*src);
size_t src_sequence_length = UTF8::seqLength(*src);
int src_code_point = UTF8::queryConvert(src, src_end - src);
if (src_code_point > 0)
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
if (src_code_point)
{
int dst_code_point = to_case(src_code_point);
int dst_code_point = to_case(*src_code_point);
if (dst_code_point > 0)
{
int dst_sequence_length = UTF8::convert(dst_code_point, dst, src_end - src);
size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
assert(dst_sequence_length <= 4);
/// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8.
/// As an example, this happens for ß and ẞ.
@ -156,7 +157,9 @@ struct LowerUpperUTF8Impl
}
}
*dst++ = *src++;
*dst = *src;
++dst;
++src;
}
}

View File

@ -168,7 +168,6 @@ namespace MultiRegexps
hs_database_t * db = nullptr;
hs_compile_error_t * compile_error;
std::unique_ptr<unsigned int[]> ids;
/// We mark the patterns to provide the callback results.

View File

@ -37,7 +37,7 @@ struct BitAndImpl
};
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true>;
using FunctionBitAnd = BinaryArithmeticOverloadResolver<BitAndImpl, NameBitAnd, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitOrImpl
};
struct NameBitOr { static constexpr auto name = "bitOr"; };
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true>;
using FunctionBitOr = BinaryArithmeticOverloadResolver<BitOrImpl, NameBitOr, true, false>;
}

View File

@ -43,7 +43,7 @@ struct BitRotateLeftImpl
};
struct NameBitRotateLeft { static constexpr auto name = "bitRotateLeft"; };
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft>;
using FunctionBitRotateLeft = BinaryArithmeticOverloadResolver<BitRotateLeftImpl, NameBitRotateLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitRotateRightImpl
};
struct NameBitRotateRight { static constexpr auto name = "bitRotateRight"; };
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight>;
using FunctionBitRotateRight = BinaryArithmeticOverloadResolver<BitRotateRightImpl, NameBitRotateRight, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftLeftImpl
};
struct NameBitShiftLeft { static constexpr auto name = "bitShiftLeft"; };
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft>;
using FunctionBitShiftLeft = BinaryArithmeticOverloadResolver<BitShiftLeftImpl, NameBitShiftLeft, true, false>;
}

View File

@ -42,7 +42,7 @@ struct BitShiftRightImpl
};
struct NameBitShiftRight { static constexpr auto name = "bitShiftRight"; };
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight>;
using FunctionBitShiftRight = BinaryArithmeticOverloadResolver<BitShiftRightImpl, NameBitShiftRight, true, false>;
}

View File

@ -34,7 +34,7 @@ struct BitTestImpl
};
struct NameBitTest { static constexpr auto name = "bitTest"; };
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest>;
using FunctionBitTest = BinaryArithmeticOverloadResolver<BitTestImpl, NameBitTest, true, false>;
}

View File

@ -36,7 +36,7 @@ struct BitXorImpl
};
struct NameBitXor { static constexpr auto name = "bitXor"; };
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true>;
using FunctionBitXor = BinaryArithmeticOverloadResolver<BitXorImpl, NameBitXor, true, false>;
}

View File

@ -3,6 +3,7 @@
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Common/NaNUtils.h>
#include <DataTypes/DataTypeString.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
@ -134,24 +135,32 @@ public:
/// Virtual call is Ok (negligible comparing to the rest of calculations).
Float64 value = arguments[0].column->getFloat64(i);
bool is_negative = value < 0;
if (is_negative)
if (!isFinite(value))
{
writeChar('-', buf_to);
value = -value;
/// Cannot decide what unit it is (years, month), just simply write inf or nan.
writeFloatText(value, buf_to);
}
/// To output separators between parts: ", " and " and ".
bool has_output = false;
switch (max_unit) /// A kind of Duff Device.
else
{
case Years: processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
case Months: processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
case Days: processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
case Hours: processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
case Minutes: processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
case Seconds: processUnit(1, " second", 7, value, buf_to, has_output);
bool is_negative = value < 0;
if (is_negative)
{
writeChar('-', buf_to);
value = -value;
}
/// To output separators between parts: ", " and " and ".
bool has_output = false;
switch (max_unit) /// A kind of Duff Device.
{
case Years: processUnit(365 * 24 * 3600, " year", 5, value, buf_to, has_output); [[fallthrough]];
case Months: processUnit(30.5 * 24 * 3600, " month", 6, value, buf_to, has_output); [[fallthrough]];
case Days: processUnit(24 * 3600, " day", 4, value, buf_to, has_output); [[fallthrough]];
case Hours: processUnit(3600, " hour", 5, value, buf_to, has_output); [[fallthrough]];
case Minutes: processUnit(60, " minute", 7, value, buf_to, has_output); [[fallthrough]];
case Seconds: processUnit(1, " second", 7, value, buf_to, has_output);
}
}
writeChar(0, buf_to);

View File

@ -23,7 +23,7 @@ struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
}
};
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false, false>;
}

View File

@ -95,7 +95,7 @@ void geodistInit()
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
sphere_metric_lut[i] = cosf(latitude);
sphere_metric_lut[i] = sqrf(cosf(latitude));
}
}
@ -182,7 +182,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
/// (Remember how a plane flies from Moscow to New York)
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".

View File

@ -54,7 +54,7 @@ struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
}
};
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false, false>;
}

View File

@ -119,8 +119,13 @@ public:
UInt32 code_point2 = generate_code_point(rand >> 32);
/// We have padding in column buffers that we can overwrite.
pos += UTF8::convert(code_point1, pos, sizeof(int));
last_writen_bytes = UTF8::convert(code_point2, pos, sizeof(int));
size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int));
assert(length1 <= 4);
pos += length1;
size_t length2 = UTF8::convertCodePointToUTF8(code_point2, pos, sizeof(int));
assert(length2 <= 4);
last_writen_bytes = length2;
pos += last_writen_bytes;
}
offset = pos - data_to.data() + 1;

View File

@ -683,7 +683,7 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
/** CSV format can contain insignificant spaces and tabs.
* Usually the task of skipping them is for the calling code.
* But in this case, it will be difficult to do this, so remove the trailing whitespace by yourself.
* But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself.
*/
size_t size = s.size();
while (size > 0

View File

@ -69,6 +69,10 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
boost::algorithm::to_lower(matched_region);
region = matched_region;
}
else
{
region = Aws::Region::AWS_GLOBAL;
}
}
}

View File

@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const
return false;
}
bool ActionsDAG::empty() const
bool ActionsDAG::trivial() const
{
for (const auto & node : nodes)
if (node.type != ActionType::INPUT)
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
return false;
return true;

View File

@ -223,7 +223,7 @@ public:
bool hasArrayJoin() const;
bool hasStatefulFunctions() const;
bool empty() const; /// If actions only contain inputs.
bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; }

View File

@ -341,6 +341,7 @@ struct ContextShared
mutable std::optional<BackgroundSchedulePool> buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
mutable std::optional<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
mutable std::optional<BackgroundSchedulePool> distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends)
mutable std::optional<BackgroundSchedulePool> message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka)
MultiVersion<Macros> macros; /// Substitutions extracted from config.
std::unique_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
/// Rules for selecting the compression settings, depending on the size of the part.
@ -439,6 +440,7 @@ struct ContextShared
buffer_flush_schedule_pool.reset();
schedule_pool.reset();
distributed_schedule_pool.reset();
message_broker_schedule_pool.reset();
ddl_worker.reset();
/// Stop trace collector if any
@ -1524,6 +1526,17 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
return *shared->distributed_schedule_pool;
}
BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
{
auto lock = getLock();
if (!shared->message_broker_schedule_pool)
shared->message_broker_schedule_pool.emplace(
settings.background_message_broker_schedule_pool_size,
CurrentMetrics::BackgroundDistributedSchedulePoolTask,
"BgMsgBrkSchPool");
return *shared->message_broker_schedule_pool;
}
bool Context::hasDistributedDDL() const
{
return getConfigRef().has("distributed_ddl");

View File

@ -613,6 +613,7 @@ public:
BackgroundSchedulePool & getBufferFlushSchedulePool() const;
BackgroundSchedulePool & getSchedulePool() const;
BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
BackgroundSchedulePool & getDistributedSchedulePool() const;
/// Has distributed_ddl configuration or not.

View File

@ -26,11 +26,18 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
{
auto & internal_select_list = union_select_query.list_of_selects->children;
if (!internal_select_list.empty())
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
for (size_t index = 1; index < internal_select_list.size(); ++index)
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
for (size_t index = 0; index < internal_select_list.size(); ++index)
{
if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
visit(*child_union, internal_select_list[index]);
else
{
if (index == 0)
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
else
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
}
}
}
void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)

View File

@ -269,6 +269,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
if (live_view_timeout)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "")
<< *live_view_timeout;
if (live_view_periodic_refresh)
{
if (live_view_timeout)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : "");
else
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "")
<< *live_view_periodic_refresh;
}
formatOnCluster(settings);
}
else

View File

@ -77,6 +77,8 @@ public:
ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.)
std::optional<UInt64> live_view_timeout; /// For CREATE LIVE VIEW ... WITH TIMEOUT ...
std::optional<UInt64> live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ...
bool attach_short_syntax{false};
std::optional<String> attach_from_path = std::nullopt;

View File

@ -307,10 +307,11 @@ bool ASTSelectQuery::final() const
bool ASTSelectQuery::withFill() const
{
if (!orderBy())
const ASTPtr order_by = orderBy();
if (!order_by)
return false;
for (const auto & order_expression_element : orderBy()->children)
for (const auto & order_expression_element : order_by->children)
if (order_expression_element->as<ASTOrderByElement &>().with_fill)
return true;

View File

@ -569,10 +569,14 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
ASTPtr as_table;
ASTPtr select;
ASTPtr live_view_timeout;
ASTPtr live_view_periodic_refresh;
String cluster_str;
bool attach = false;
bool if_not_exists = false;
bool with_and = false;
bool with_timeout = false;
bool with_periodic_refresh = false;
if (!s_create.ignore(pos, expected))
{
@ -594,10 +598,35 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
if (!table_name_p.parse(pos, table, expected))
return false;
if (ParserKeyword{"WITH TIMEOUT"}.ignore(pos, expected))
if (ParserKeyword{"WITH"}.ignore(pos, expected))
{
if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected))
{
if (!ParserNumber{}.parse(pos, live_view_timeout, expected))
{
live_view_timeout = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC));
}
/// Optional - AND
if (ParserKeyword{"AND"}.ignore(pos, expected))
with_and = true;
with_timeout = true;
}
if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected))
{
if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected))
live_view_periodic_refresh = std::make_shared<ASTLiteral>(static_cast<UInt64>(DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC));
with_periodic_refresh = true;
}
else if (with_and)
return false;
if (!with_timeout && !with_periodic_refresh)
return false;
}
if (ParserKeyword{"ON"}.ignore(pos, expected))
@ -656,6 +685,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
if (live_view_timeout)
query->live_view_timeout.emplace(live_view_timeout->as<ASTLiteral &>().value.safeGet<UInt64>());
if (live_view_periodic_refresh)
query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as<ASTLiteral &>().value.safeGet<UInt64>());
return true;
}

View File

@ -4,13 +4,14 @@
namespace DB
{
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin)
{
/// We have just two kind of parentheses: () and [].
UnmatchedParentheses stack;
for (TokenIterator it = begin;
it.isValid() && it->begin <= last.begin; ++it)
/// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error
/// when parser failed in the middle of the query.
for (TokenIterator it = begin; it.isValid(); ++it)
{
if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
{

View File

@ -80,6 +80,6 @@ public:
/// Returns positions of unmatched parentheses.
using UnmatchedParentheses = std::vector<Token>;
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);
UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin);
}

View File

@ -79,7 +79,7 @@ void writeQueryWithHighlightedErrorPositions(
{
const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
assert(current_position_to_hilite < end);
assert(current_position_to_hilite <= end);
assert(current_position_to_hilite >= begin);
out.write(pos, current_position_to_hilite - pos);
@ -290,7 +290,7 @@ ASTPtr tryParseQuery(
}
/// Unmatched parentheses
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens));
if (!unmatched_parens.empty())
{
out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,

View File

@ -0,0 +1,56 @@
#pragma once
#include <Processors/QueryPlan/QueryPlan.h>
#include <array>
namespace DB
{
namespace QueryPlanOptimizations
{
/// This is the main function which optimizes the whole QueryPlan tree.
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Optimization is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
/// It return the number of updated layers of subtree if some change happened.
/// It must guarantee that the structure of tree is correct.
///
/// New nodes should be added to QueryPlan::Nodes list.
/// It is not needed to remove old nodes from the list.
struct Optimization
{
using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &);
const Function apply = nullptr;
const char * name;
};
/// Move ARRAY JOIN up if possible.
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
/// Move LimitStep down if possible.
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
inline const auto & getOptimizations()
{
static const std::array<Optimization, 4> optimizations =
{{
{tryLiftUpArrayJoin, "liftUpArrayJoin"},
{tryPushDownLimit, "pushDownLimit"},
{trySplitFilter, "splitFilter"},
{tryMergeExpressions, "mergeExpressions"},
}};
return optimizations;
}
}
}

View File

@ -0,0 +1,85 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoinAction.h>
namespace DB::QueryPlanOptimizations
{
size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
if (!(expression_step || filter_step) || !array_join_step)
return 0;
const auto & array_join = array_join_step->arrayJoin();
const auto & expression = expression_step ? expression_step->getExpression()
: filter_step->getExpression();
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
/// No actions can be moved before ARRAY JOIN.
if (split_actions.first->trivial())
return 0;
auto description = parent->getStepDescription();
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
if (split_actions.second->trivial())
{
auto expected_header = parent->getOutputStream().header;
/// Expression/Filter -> ArrayJoin
std::swap(parent, child);
/// ArrayJoin -> Expression/Filter
if (expression_step)
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
else
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
child->setStepDescription(std::move(description));
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
return 2;
}
/// Add new expression step before ARRAY JOIN.
/// Expression/Filter -> ArrayJoin -> Something
auto & node = nodes.emplace_back();
node.children.swap(child_node->children);
child_node->children.emplace_back(&node);
/// Expression/Filter -> ArrayJoin -> node -> Something
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
node.step->setStepDescription(description);
array_join_step->updateInputStream(node.step->getOutputStream(), {});
if (expression_step)
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
else
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
parent->setStepDescription(description + " [split]");
return 3;
}
}

View File

@ -0,0 +1,114 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/MergingSortedStep.h>
#include <Processors/QueryPlan/FinishSortingStep.h>
#include <Processors/QueryPlan/MergeSortingStep.h>
#include <Processors/QueryPlan/PartialSortingStep.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
{
/// If plan looks like Limit -> Sorting, update limit for Sorting
static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
{
if (limit == 0)
return false;
QueryPlanStepPtr & step = node->step;
QueryPlan::Node * child = nullptr;
bool updated = false;
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
{
/// TODO: remove LimitStep here.
merging_sorted->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
{
/// TODO: remove LimitStep here.
finish_sorting->updateLimit(limit);
updated = true;
}
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
{
merge_sorting->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
{
partial_sorting->updateLimit(limit);
updated = true;
}
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
/// Try update limit for them also if possible.
if (child)
tryUpdateLimitForSortingSteps(child, limit);
return updated;
}
size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * limit = typeid_cast<LimitStep *>(parent.get());
if (!limit)
return 0;
/// Skip LIMIT WITH TIES by now.
if (limit->withTies())
return 0;
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
/// Skip everything which is not transform.
if (!transforming)
return 0;
/// Special cases for sorting steps.
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
return 0;
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return 0;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();
const auto & data_stream_traits = transforming->getDataStreamTraits();
/// Cannot push down if child changes the number of rows.
if (!transform_traits.preserves_number_of_rows)
return 0;
/// Cannot push down if data was sorted exactly by child stream.
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
return 0;
/// Now we push down limit only if it doesn't change any stream properties.
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
return 0;
/// Input stream for Limit have changed.
limit->updateInputStream(transforming->getInputStreams().front());
parent.swap(child);
return 2;
}
}

View File

@ -0,0 +1,65 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
namespace DB::QueryPlanOptimizations
{
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
return false;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
if (parent_expr && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_expr->getExpression();
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
/// Such a query will return two zeroes if we combine actions together.
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(expr);
parent_node->children.swap(child_node->children);
return 1;
}
else if (parent_filter && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return 0;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return 1;
}
return 0;
}
}

View File

@ -0,0 +1,75 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <stack>
namespace DB::QueryPlanOptimizations
{
void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
{
const auto & optimizations = getOptimizations();
struct Frame
{
QueryPlan::Node * node;
/// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen).
/// Otherwise, traverse all children.
size_t depth_limit = 0;
/// Next child to process.
size_t next_child = 0;
};
std::stack<Frame> stack;
stack.push(Frame{.node = &root});
while (!stack.empty())
{
auto & frame = stack.top();
/// If traverse_depth_limit == 0, then traverse without limit (first entrance)
/// If traverse_depth_limit > 1, then traverse with (limit - 1)
if (frame.depth_limit != 1)
{
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
stack.push(Frame
{
.node = frame.node->children[frame.next_child],
.depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0,
});
++frame.next_child;
continue;
}
}
size_t max_update_depth = 0;
/// Apply all optimizations.
for (const auto & optimization : optimizations)
{
/// Just in case, skip optimization if it is not initialized.
if (!optimization.apply)
continue;
/// Try to apply optimization.
auto update_depth = optimization.apply(frame.node, nodes);
max_update_depth = std::max<size_t>(max_update_depth, update_depth);
}
/// Traverse `max_update_depth` layers of tree again.
if (max_update_depth)
{
frame.depth_limit = max_update_depth;
frame.next_child = 0;
continue;
}
/// Nothing was applied.
stack.pop();
}
}
}

View File

@ -0,0 +1,50 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
namespace DB::QueryPlanOptimizations
{
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
{
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
if (!filter_step)
return 0;
const auto & expr = filter_step->getExpression();
/// Do not split if there are function like runningDifference.
if (expr->hasStatefulFunctions())
return 0;
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
if (split.second->trivial())
return 0;
if (filter_step->removesFilterColumn())
split.second->removeUnusedInput(filter_step->getFilterColumnName());
auto description = filter_step->getStepDescription();
auto & filter_node = nodes.emplace_back();
node->children.swap(filter_node.children);
node->children.push_back(&filter_node);
filter_node.step = std::make_unique<FilterStep>(
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
filter_node.step->setStepDescription("(" + description + ")[split]");
node->step->setStepDescription(description);
return 2;
}
}

View File

@ -6,15 +6,7 @@
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoinAction.h>
#include <stack>
#include <Processors/QueryPlan/LimitStep.h>
#include "MergingSortedStep.h"
#include "FinishSortingStep.h"
#include "MergeSortingStep.h"
#include "PartialSortingStep.h"
#include "TotalsHavingStep.h"
#include "ExpressionStep.h"
#include "ArrayJoinStep.h"
#include "FilterStep.h"
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
namespace DB
{
@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
}
}
/// If plan looks like Limit -> Sorting, update limit for Sorting
bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit)
{
if (limit == 0)
return false;
QueryPlanStepPtr & step = node->step;
QueryPlan::Node * child = nullptr;
bool updated = false;
if (auto * merging_sorted = typeid_cast<MergingSortedStep *>(step.get()))
{
/// TODO: remove LimitStep here.
merging_sorted->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * finish_sorting = typeid_cast<FinishSortingStep *>(step.get()))
{
/// TODO: remove LimitStep here.
finish_sorting->updateLimit(limit);
updated = true;
}
else if (auto * merge_sorting = typeid_cast<MergeSortingStep *>(step.get()))
{
merge_sorting->updateLimit(limit);
updated = true;
child = node->children.front();
}
else if (auto * partial_sorting = typeid_cast<PartialSortingStep *>(step.get()))
{
partial_sorting->updateLimit(limit);
updated = true;
}
/// We often have chain PartialSorting -> MergeSorting -> MergingSorted
/// Try update limit for them also if possible.
if (child)
tryUpdateLimitForSortingSteps(child, limit);
return updated;
}
/// Move LimitStep down if possible.
static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node)
{
auto & child = child_node->step;
auto * limit = typeid_cast<LimitStep *>(parent.get());
if (!limit)
return;
/// Skip LIMIT WITH TIES by now.
if (limit->withTies())
return;
const auto * transforming = dynamic_cast<const ITransformingStep *>(child.get());
/// Skip everything which is not transform.
if (!transforming)
return;
/// Special cases for sorting steps.
if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting()))
return;
/// Special case for TotalsHaving. Totals may be incorrect if we push down limit.
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();
const auto & data_stream_traits = transforming->getDataStreamTraits();
/// Cannot push down if child changes the number of rows.
if (!transform_traits.preserves_number_of_rows)
return;
/// Cannot push down if data was sorted exactly by child stream.
if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting)
return;
/// Now we push down limit only if it doesn't change any stream properties.
/// TODO: some of them may be changed and, probably, not important for following streams. We may add such info.
if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream()))
return;
/// Input stream for Limit have changed.
limit->updateInputStream(transforming->getInputStreams().front());
parent.swap(child);
}
/// Move ARRAY JOIN up if possible.
static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes)
{
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * expression_step = typeid_cast<ExpressionStep *>(parent.get());
auto * filter_step = typeid_cast<FilterStep *>(parent.get());
auto * array_join_step = typeid_cast<ArrayJoinStep *>(child.get());
if (!(expression_step || filter_step) || !array_join_step)
return;
const auto & array_join = array_join_step->arrayJoin();
const auto & expression = expression_step ? expression_step->getExpression()
: filter_step->getExpression();
auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns);
/// No actions can be moved before ARRAY JOIN.
if (split_actions.first->empty())
return;
auto description = parent->getStepDescription();
/// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin.
if (split_actions.second->empty())
{
auto expected_header = parent->getOutputStream().header;
/// Expression/Filter -> ArrayJoin
std::swap(parent, child);
/// ArrayJoin -> Expression/Filter
if (expression_step)
child = std::make_unique<ExpressionStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
else
child = std::make_unique<FilterStep>(child_node->children.at(0)->step->getOutputStream(),
std::move(split_actions.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
child->setStepDescription(std::move(description));
array_join_step->updateInputStream(child->getOutputStream(), expected_header);
return;
}
/// Add new expression step before ARRAY JOIN.
/// Expression/Filter -> ArrayJoin -> Something
auto & node = nodes.emplace_back();
node.children.swap(child_node->children);
child_node->children.emplace_back(&node);
/// Expression/Filter -> ArrayJoin -> node -> Something
node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
std::move(split_actions.first));
node.step->setStepDescription(description);
array_join_step->updateInputStream(node.step->getOutputStream(), {});
if (expression_step)
parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), split_actions.second);
else
parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), split_actions.second,
filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
parent->setStepDescription(description + " [split]");
}
/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep
/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep
static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node)
{
auto & parent = parent_node->step;
auto & child = child_node->step;
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
if (parent_expr && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_expr->getExpression();
/// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them.
/// Example: select rowNumberInBlock() from (select arrayJoin([1, 2]))
/// Such a query will return two zeroes if we combine actions together.
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return false;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), merged);
expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(expr);
parent_node->children.swap(child_node->children);
return true;
}
else if (parent_filter && child_expr)
{
const auto & child_actions = child_expr->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions())
return false;
auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions));
auto filter = std::make_unique<FilterStep>(child_expr->getInputStreams().front(), merged,
parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn());
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return true;
}
return false;
}
/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes.
static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
{
auto * filter_step = typeid_cast<FilterStep *>(node->step.get());
if (!filter_step)
return false;
const auto & expr = filter_step->getExpression();
/// Do not split if there are function like runningDifference.
if (expr->hasStatefulFunctions())
return false;
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
if (split.second->empty())
return false;
if (filter_step->removesFilterColumn())
split.second->removeUnusedInput(filter_step->getFilterColumnName());
auto description = filter_step->getStepDescription();
auto & filter_node = nodes.emplace_back();
node->children.swap(filter_node.children);
node->children.push_back(&filter_node);
filter_node.step = std::make_unique<FilterStep>(
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
filter_node.step->setStepDescription("(" + description + ")[split]");
node->step->setStepDescription(description);
return true;
}
void QueryPlan::optimize()
{
/* Stack contains info for every nodes in the path from tree root to the current node.
* Every optimization changes only current node and it's children.
* Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one).
* So, QueryPlan::Node::children will be always valid.
*/
struct Frame
{
Node * node;
size_t next_child = 0;
};
std::stack<Frame> stack;
stack.push(Frame{.node = root});
while (!stack.empty())
{
auto & frame = stack.top();
if (frame.next_child == 0)
{
if (frame.node->children.size() == 1)
{
tryPushDownLimit(frame.node->step, frame.node->children.front());
while (tryMergeExpressions(frame.node, frame.node->children.front()));
if (frame.node->children.size() == 1)
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
trySplitFilter(frame.node, nodes);
}
}
if (frame.next_child < frame.node->children.size())
{
stack.push(Frame{frame.node->children[frame.next_child]});
++frame.next_child;
}
else
{
if (frame.node->children.size() == 1)
{
while (tryMergeExpressions(frame.node, frame.node->children.front()));
trySplitFilter(frame.node, nodes);
tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes);
}
stack.pop();
}
}
QueryPlanOptimizations::optimizeTree(*root, nodes);
}
}

View File

@ -116,6 +116,11 @@ SRCS(
QueryPlan/MergingFinal.cpp
QueryPlan/MergingSortedStep.cpp
QueryPlan/OffsetStep.cpp
QueryPlan/Optimizations/liftUpArrayJoin.cpp
QueryPlan/Optimizations/limitPushDown.cpp
QueryPlan/Optimizations/mergeExpressions.cpp
QueryPlan/Optimizations/optimizeTree.cpp
QueryPlan/Optimizations/splitFilter.cpp
QueryPlan/PartialSortingStep.cpp
QueryPlan/QueryPlan.cpp
QueryPlan/ReadFromPreparedSource.cpp

View File

@ -109,15 +109,16 @@ public:
* because it allows to check the IP ranges of the trusted proxy.
* Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP.
*/
TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_)
TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_,
std::string server_display_name_)
: Poco::Net::TCPServerConnection(socket_)
, server(server_)
, parse_proxy_protocol(parse_proxy_protocol_)
, log(&Poco::Logger::get("TCPHandler"))
, connection_context(server.context())
, query_context(server.context())
, server_display_name(std::move(server_display_name_))
{
server_display_name = server.config().getString("display_name", getFQDNOrHostName());
}
void run() override;

View File

@ -17,6 +17,7 @@ private:
IServer & server;
bool parse_proxy_protocol = false;
Poco::Logger * log;
std::string server_display_name;
class DummyTCPHandler : public Poco::Net::TCPServerConnection
{
@ -34,6 +35,7 @@ public:
: server(server_), parse_proxy_protocol(parse_proxy_protocol_)
, log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory"))
{
server_display_name = server.config().getString("display_name", getFQDNOrHostName());
}
Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override
@ -42,7 +44,7 @@ public:
{
LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
return new TCPHandler(server, socket, parse_proxy_protocol);
return new TCPHandler(server, socket, parse_proxy_protocol, server_display_name);
}
catch (const Poco::Net::NetException &)
{

View File

@ -196,7 +196,7 @@ StorageKafka::StorageKafka(
auto task_count = thread_per_consumer ? num_consumers : 1;
for (size_t i = 0; i < task_count; ++i)
{
auto task = global_context.getSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
auto task = global_context.getMessageBrokerSchedulePool().createTask(log->name(), [this, i]{ threadFunc(i); });
task->deactivate();
tasks.emplace_back(std::make_shared<TaskContext>(std::move(task)));
}

View File

@ -34,6 +34,7 @@ public:
{
new_blocks_metadata->hash = key_str;
new_blocks_metadata->version = storage.getBlocksVersion() + 1;
new_blocks_metadata->time = std::chrono::system_clock::now();
for (auto & block : *new_blocks)
{
@ -48,6 +49,15 @@ public:
storage.condition.notify_all();
}
else
{
// only update blocks time
new_blocks_metadata->hash = storage.getBlocksHashKey();
new_blocks_metadata->version = storage.getBlocksVersion();
new_blocks_metadata->time = std::chrono::system_clock::now();
(*storage.blocks_metadata_ptr) = new_blocks_metadata;
}
new_blocks.reset();
new_blocks_metadata.reset();

View File

@ -20,6 +20,7 @@ limitations under the License. */
#include <DataStreams/MaterializingBlockInputStream.h>
#include <DataStreams/SquashingBlockInputStream.h>
#include <DataStreams/copyData.h>
#include <common/logger_useful.h>
#include <Common/typeid_cast.h>
#include <Common/SipHash.h>
@ -254,6 +255,8 @@ StorageLiveView::StorageLiveView(
live_view_context = std::make_unique<Context>(global_context);
live_view_context->makeQueryContext();
log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")");
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
@ -275,12 +278,21 @@ StorageLiveView::StorageLiveView(
if (query.live_view_timeout)
{
is_temporary = true;
temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
temporary_live_view_timeout = Seconds {*query.live_view_timeout};
}
if (query.live_view_periodic_refresh)
{
is_periodically_refreshed = true;
periodic_live_view_refresh = Seconds {*query.live_view_periodic_refresh};
}
blocks_ptr = std::make_shared<BlocksPtr>();
blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
active_ptr = std::make_shared<bool>(true);
periodic_refresh_task = global_context.getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
periodic_refresh_task->deactivate();
}
Block StorageLiveView::getHeader() const
@ -369,10 +381,21 @@ bool StorageLiveView::getNewBlocks()
}
new_blocks_metadata->hash = key.toHexString();
new_blocks_metadata->version = getBlocksVersion() + 1;
new_blocks_metadata->time = std::chrono::system_clock::now();
(*blocks_ptr) = new_blocks;
(*blocks_metadata_ptr) = new_blocks_metadata;
updated = true;
}
else
{
new_blocks_metadata->hash = getBlocksHashKey();
new_blocks_metadata->version = getBlocksVersion();
new_blocks_metadata->time = std::chrono::system_clock::now();
(*blocks_metadata_ptr) = new_blocks_metadata;
}
}
return updated;
}
@ -392,11 +415,18 @@ void StorageLiveView::startup()
{
if (is_temporary)
TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
if (is_periodically_refreshed)
periodic_refresh_task->activate();
}
void StorageLiveView::shutdown()
{
shutdown_called = true;
if (is_periodically_refreshed)
periodic_refresh_task->deactivate();
DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
}
@ -415,15 +445,55 @@ void StorageLiveView::drop()
condition.notify_all();
}
void StorageLiveView::refresh()
void StorageLiveView::scheduleNextPeriodicRefresh()
{
Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
if ((current_time - periodic_live_view_refresh) >= blocks_time)
{
refresh(false);
blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
}
current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
auto next_refresh_time = blocks_time + periodic_live_view_refresh;
if (current_time >= next_refresh_time)
periodic_refresh_task->scheduleAfter(0);
else
{
auto schedule_time = std::chrono::duration_cast<MilliSeconds> (next_refresh_time - current_time);
periodic_refresh_task->scheduleAfter(static_cast<size_t>(schedule_time.count()));
}
}
void StorageLiveView::periodicRefreshTaskFunc()
{
LOG_TRACE(log, "periodic refresh task");
std::lock_guard lock(mutex);
if (hasActiveUsers())
scheduleNextPeriodicRefresh();
}
void StorageLiveView::refresh(bool grab_lock)
{
// Lock is already acquired exclusively from InterperterAlterQuery.cpp InterpreterAlterQuery::execute() method.
// So, reacquiring lock is not needed and will result in an exception.
if (grab_lock)
{
std::lock_guard lock(mutex);
if (getNewBlocks())
condition.notify_all();
}
else
{
if (getNewBlocks())
condition.notify_all();
}
}
Pipe StorageLiveView::read(
@ -435,15 +505,21 @@ Pipe StorageLiveView::read(
const size_t /*max_block_size*/,
const unsigned /*num_streams*/)
{
std::lock_guard lock(mutex);
if (!(*blocks_ptr))
refresh(false);
else if (is_periodically_refreshed)
{
std::lock_guard lock(mutex);
if (!(*blocks_ptr))
{
if (getNewBlocks())
condition.notify_all();
}
return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
Seconds current_time = std::chrono::duration_cast<Seconds> (std::chrono::system_clock::now().time_since_epoch());
Seconds blocks_time = std::chrono::duration_cast<Seconds> (getBlocksTime().time_since_epoch());
if ((current_time - periodic_live_view_refresh) >= blocks_time)
refresh(false);
}
return Pipe(std::make_shared<BlocksSource>(blocks_ptr, getHeader()));
}
BlockInputStreams StorageLiveView::watch(
@ -458,6 +534,7 @@ BlockInputStreams StorageLiveView::watch(
bool has_limit = false;
UInt64 limit = 0;
BlockInputStreamPtr reader;
if (query.limit_length)
{
@ -466,45 +543,28 @@ BlockInputStreams StorageLiveView::watch(
}
if (query.is_watch_events)
{
auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
reader = std::make_shared<LiveViewEventsBlockInputStream>(
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
{
std::lock_guard lock(mutex);
if (!(*blocks_ptr))
{
if (getNewBlocks())
condition.notify_all();
}
}
processed_stage = QueryProcessingStage::Complete;
return { reader };
}
else
{
auto reader = std::make_shared<LiveViewBlockInputStream>(
reader = std::make_shared<LiveViewBlockInputStream>(
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
{
std::lock_guard lock(mutex);
if (!(*blocks_ptr))
{
if (getNewBlocks())
condition.notify_all();
}
}
{
std::lock_guard lock(mutex);
processed_stage = QueryProcessingStage::Complete;
if (!(*blocks_ptr))
refresh(false);
return { reader };
if (is_periodically_refreshed)
scheduleNextPeriodicRefresh();
}
processed_stage = QueryProcessingStage::Complete;
return { reader };
}
NamesAndTypesList StorageLiveView::getVirtuals() const

View File

@ -13,6 +13,7 @@ limitations under the License. */
#include <ext/shared_ptr_helper.h>
#include <Storages/IStorage.h>
#include <Core/BackgroundSchedulePool.h>
#include <mutex>
#include <condition_variable>
@ -21,10 +22,16 @@ limitations under the License. */
namespace DB
{
using Time = std::chrono::time_point<std::chrono::system_clock>;
using Seconds = std::chrono::seconds;
using MilliSeconds = std::chrono::milliseconds;
struct BlocksMetadata
{
String hash;
UInt64 version;
Time time;
};
struct MergeableBlocks
@ -75,8 +82,10 @@ public:
NamesAndTypesList getVirtuals() const override;
bool isTemporary() const { return is_temporary; }
std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; }
Seconds getTimeout() const { return temporary_live_view_timeout; }
Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; }
/// Check if we have any readers
/// must be called with mutex locked
@ -109,6 +118,15 @@ public:
return 0;
}
/// Get blocks time
/// must be called with mutex locked
Time getBlocksTime()
{
if (*blocks_metadata_ptr)
return (*blocks_metadata_ptr)->time;
return {};
}
/// Reset blocks
/// must be called with mutex locked
void reset()
@ -124,7 +142,7 @@ public:
void startup() override;
void shutdown() override;
void refresh();
void refresh(const bool grab_lock = true);
Pipe read(
const Names & column_names,
@ -176,8 +194,13 @@ private:
Context & global_context;
std::unique_ptr<Context> live_view_context;
Poco::Logger * log;
bool is_temporary = false;
std::chrono::seconds temporary_live_view_timeout;
bool is_periodically_refreshed = false;
Seconds temporary_live_view_timeout;
Seconds periodic_live_view_refresh;
/// Mutex to protect access to sample block and inner_blocks_query
mutable std::mutex sample_block_lock;
@ -199,6 +222,13 @@ private:
std::atomic<bool> shutdown_called = false;
/// Periodic refresh task used when [PERIODIC] REFRESH is specified in create statement
BackgroundSchedulePool::TaskHolder periodic_refresh_task;
void periodicRefreshTaskFunc();
/// Must be called with mutex locked
void scheduleNextPeriodicRefresh();
StorageLiveView(
const StorageID & table_id_,
Context & local_context,

View File

@ -414,18 +414,19 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
end -= 64;
const auto * pos = end;
UInt64 val =
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
zero16))) << 16u)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
zero16))) << 32u)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
zero16))) << 48u);
val = ~val;
if (val == 0)
count += 64;
else

View File

@ -74,20 +74,9 @@ size_t ReplicatedMergeTreePartCheckThread::size() const
}
void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & part_name)
ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name)
{
auto zookeeper = storage.getZooKeeper();
String part_path = storage.replica_path + "/parts/" + part_name;
/// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
if (zookeeper->exists(part_path))
{
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
storage.removePartAndEnqueueFetch(part_name);
return;
}
/// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
@ -115,7 +104,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
* and don't delete the queue entry when in doubt.
*/
LOG_WARNING(log, "Checking if anyone has a part covering {}.", part_name);
LOG_WARNING(log, "Checking if anyone has a part {} or covering part.", part_name);
bool found_part_with_the_same_min_block = false;
bool found_part_with_the_same_max_block = false;
@ -123,15 +112,27 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas");
for (const String & replica : replicas)
{
Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
String replica_path = storage.zookeeper_path + "/replicas/" + replica;
Strings parts = zookeeper->getChildren(replica_path + "/parts");
for (const String & part_on_replica : parts)
{
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
if (part_info == part_on_replica_info)
{
/// Found missing part at ourself. If we are here then something wrong with this part, so skipping.
if (replica_path == storage.replica_path)
continue;
LOG_WARNING(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica);
return MissingPartSearchResult::FoundAndNeedFetch;
}
if (part_on_replica_info.contains(part_info))
{
LOG_WARNING(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name);
return;
return MissingPartSearchResult::FoundAndDontNeedFetch;
}
if (part_info.contains(part_on_replica_info))
@ -144,7 +145,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
if (found_part_with_the_same_min_block && found_part_with_the_same_max_block)
{
LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name);
return;
return MissingPartSearchResult::FoundAndDontNeedFetch;
}
}
}
@ -160,28 +161,61 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
not_found_msg = "smaller parts with either the same min block or the same max block.";
LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
/// Is it in the replication queue? If there is - delete, because the task can not be processed.
if (!storage.queue.remove(zookeeper, part_name))
{
/// The part was not in our queue. Why did it happen?
LOG_ERROR(log, "Missing part {} is not in our queue.", part_name);
return;
}
/** This situation is possible if on all the replicas where the part was, it deteriorated.
* For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
*/
LOG_ERROR(log, "Part {} is lost forever.", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
return MissingPartSearchResult::LostForever;
}
CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper)
{
LOG_WARNING(log, "Checking part {}", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
auto zookeeper = storage.getZooKeeper();
auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name);
/// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue.
if (exists_in_zookeeper)
{
/// If part found on some other replica
if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch)
{
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name);
storage.removePartAndEnqueueFetch(part_name);
}
else /// If we have covering part on other replica or part is lost forever we don't need to fetch anything
{
LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name);
storage.removePartFromZooKeeper(part_name);
}
}
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
if (missing_part_search_result == MissingPartSearchResult::LostForever)
{
/// Is it in the replication queue? If there is - delete, because the task can not be processed.
if (!storage.queue.remove(zookeeper, part_name))
{
/// The part was not in our queue.
LOG_WARNING(log, "Missing part {} is not in our queue, this can happen rarely.", part_name);
}
/** This situation is possible if on all the replicas where the part was, it deteriorated.
* For example, a replica that has just written it has power turned off and the data has not been written from cache to disk.
*/
LOG_ERROR(log, "Part {} is lost forever.", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss);
}
}
std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name)
{
auto zookeeper = storage.getZooKeeper();
String part_path = storage.replica_path + "/parts/" + part_name;
/// It's important to check zookeeper first and after that check local storage,
/// because our checks of local storage and zookeeper are not consistent.
/// If part exists in zookeeper and doesn't exists in local storage definitely require
/// to fetch this part. But if we check local storage first and than check zookeeper
/// some background process can successfully commit part between this checks (both to the local stoarge and zookeeper),
/// but checker thread will remove part from zookeeper and queue fetch.
bool exists_in_zookeeper = zookeeper->exists(part_path);
/// If the part is still in the PreCommitted -> Committed transition, it is not lost
/// and there is no need to go searching for it on other replicas. To definitely find the needed part
@ -190,17 +224,27 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
if (!part)
part = storage.getActiveContainingPart(part_name);
return std::make_pair(exists_in_zookeeper, part);
}
CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
{
LOG_WARNING(log, "Checking part {}", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
auto [exists_in_zookeeper, part] = findLocalPart(part_name);
/// We do not have this or a covering part.
if (!part)
{
searchForMissingPart(part_name);
searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
return {part_name, false, "Part is missing, will search for it"};
}
/// We have this part, and it's active. We will check whether we need this part and whether it has the right data.
else if (part->name == part_name)
if (part->name == part_name)
{
auto zookeeper = storage.getZooKeeper();
auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations);
auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums(
@ -254,11 +298,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
tryLogCurrentException(log, __PRETTY_FUNCTION__);
String message = "Part " + part_name + " looks broken. Removing it and queueing a fetch.";
String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
LOG_ERROR(log, message);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
storage.removePartAndEnqueueFetch(part_name);
/// Part is broken, let's try to find it and fetch.
searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper);
/// Delete part locally.
storage.forgetPartAndMoveToDetached(part, "broken");

View File

@ -12,6 +12,7 @@
#include <common/logger_useful.h>
#include <Core/BackgroundSchedulePool.h>
#include <Storages/CheckResults.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
namespace DB
{
@ -73,7 +74,26 @@ public:
private:
void run();
void searchForMissingPart(const String & part_name);
/// Search for missing part and queue fetch if possible. Otherwise
/// remove part from zookeeper and queue.
void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper);
std::pair<bool, MergeTreeDataPartPtr> findLocalPart(const String & part_name);
enum MissingPartSearchResult
{
/// We found this part on other replica, let's fetch it.
FoundAndNeedFetch,
/// We found covering part or source part with same min and max block number
/// don't need to fetch because we should do it during normal queue processing.
FoundAndDontNeedFetch,
/// Covering part not found anywhere and exact part_name doesn't found on other
/// replicas.
LostForever,
};
/// Search for missing part on other replicas or covering part on all replicas (including our replica).
MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name);
StorageReplicatedMergeTree & storage;
String log_name;

View File

@ -420,13 +420,26 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri
{
std::unique_lock lock(state_mutex);
virtual_parts.remove(part_name);
bool removed = virtual_parts.remove(part_name);
for (Queue::iterator it = queue.begin(); it != queue.end();)
{
if ((*it)->new_part_name == part_name)
{
found = *it;
if (removed)
{
/// Preserve invariant `virtual_parts` = `current_parts` + `queue`.
/// We remove new_part from virtual parts and add all source parts
/// which present in current_parts.
for (const auto & source_part : found->source_parts)
{
auto part_in_current_parts = current_parts.getContainingPart(source_part);
if (part_in_current_parts == source_part)
virtual_parts.add(source_part);
}
}
updateStateOnQueueEntryRemoval(
found, /* is_successful = */ false,
min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock);
@ -1010,7 +1023,7 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_
/// NOTE The above is redundant, but left for a more convenient message in the log.
auto result_part = MergeTreePartInfo::fromPartName(new_part_name, format_version);
/// It can slow down when the size of `future_parts` is large. But it can not be large, since `BackgroundProcessingPool` is limited.
/// It can slow down when the size of `future_parts` is large. But it can not be large, since background pool is limited.
for (const auto & future_part_elem : future_parts)
{
auto future_part = MergeTreePartInfo::fromPartName(future_part_elem.first, format_version);

View File

@ -112,13 +112,13 @@ StorageRabbitMQ::StorageRabbitMQ(
/// One looping task for all consumers as they share the same connection == the same handler == the same event loop
event_handler->updateLoopState(Loop::STOP);
looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
looping_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); });
looping_task->deactivate();
streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
streaming_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); });
streaming_task->deactivate();
connection_task = global_context.getSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
connection_task = global_context.getMessageBrokerSchedulePool().createTask("RabbitMQConnectionTask", [this]{ connectionFunc(); });
connection_task->deactivate();
if (queue_base.empty())

View File

@ -3013,6 +3013,21 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1));
}
void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_name)
{
auto zookeeper = getZooKeeper();
String part_path = replica_path + "/parts/" + part_name;
Coordination::Stat stat;
/// Part doesn't exist, nothing to remove
if (!zookeeper->exists(part_path, &stat))
return;
Coordination::Requests ops;
removePartFromZooKeeper(part_name, ops, stat.numChildren > 0);
zookeeper->multi(ops);
}
void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
{

View File

@ -381,6 +381,9 @@ private:
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
void removePartFromZooKeeper(const String & part_name, Coordination::Requests & ops, bool has_children);
/// Just removes part from ZooKeeper using previous method
void removePartFromZooKeeper(const String & part_name);
/// Quickly removes big set of parts from ZooKeeper (using async multi queries)
void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names,
NameSet * parts_should_be_retried = nullptr);

View File

@ -329,7 +329,7 @@ Pipe StorageS3::read(
context,
metadata_snapshot->getColumns(),
max_block_size,
chooseCompressionMethod(uri.endpoint, compression_method),
chooseCompressionMethod(uri.key, compression_method),
client,
uri.bucket,
key));
@ -347,7 +347,7 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet
format_name,
metadata_snapshot->getSampleBlock(),
global_context,
chooseCompressionMethod(uri.endpoint, compression_method),
chooseCompressionMethod(uri.key, compression_method),
client,
uri.bucket,
uri.key,

View File

@ -23,6 +23,7 @@ const char * auto_contributors[] {
"Alexander Burmak",
"Alexander Ermolaev",
"Alexander GQ Gerasiov",
"Alexander Gololobov",
"Alexander Kazakov",
"Alexander Kozhikhov",
"Alexander Krasheninnikov",
@ -43,6 +44,7 @@ const char * auto_contributors[] {
"Alexandr Krasheninnikov",
"Alexandr Orlov",
"Alexandra Latysheva",
"Alexandre Snarskii",
"Alexei Averchenko",
"Alexey",
"Alexey Arno",
@ -143,6 +145,7 @@ const char * auto_contributors[] {
"CurtizJ",
"Daniel Bershatsky",
"Daniel Dao",
"Daniel Qin",
"Danila Kutenin",
"Dao Minh Thuc",
"Daria Mozhaeva",
@ -309,7 +312,9 @@ const char * auto_contributors[] {
"Marek Vavrusa",
"Marek Vavruša",
"Marek Vavruša",
"Mariano Benítez Mulet",
"Mark Andreev",
"Mark Frost",
"Mark Papadakis",
"Maroun Maroun",
"Marsel Arduanov",
@ -422,6 +427,7 @@ const char * auto_contributors[] {
"Rafael David Tinoco",
"Ramazan Polat",
"Ravengg",
"RegulusZ",
"Reilee",
"Reto Kromer",
"Ri",
@ -482,9 +488,11 @@ const char * auto_contributors[] {
"Tangaev",
"Tema Novikov",
"The-Alchemist",
"TiunovNN",
"Tobias Adamson",
"Tom Bombadil",
"Tsarkova Anastasia",
"TszkitLo40",
"Ubuntu",
"Ubus",
"UnamedRus",
@ -556,6 +564,7 @@ const char * auto_contributors[] {
"Yury Stankevich",
"Zhichang Yu",
"Zhipeng",
"Zoran Pandovski",
"a.palagashvili",
"abdrakhmanov",
"abyss7",
@ -571,6 +580,7 @@ const char * auto_contributors[] {
"alex.lvxin",
"alexander kozhikhov",
"alexey-milovidov",
"alfredlu",
"amoschen",
"amudong",
"ana-uvarova",
@ -588,14 +598,17 @@ const char * auto_contributors[] {
"avsharapov",
"awesomeleo",
"benamazing",
"benbiti",
"bgranvea",
"bharatnc",
"blazerer",
"bluebirddm",
"bo zeng",
"bobrovskij artemij",
"booknouse",
"bseng",
"cekc",
"centos7",
"champtar",
"chang.chen",
"chengy8934",
@ -606,6 +619,7 @@ const char * auto_contributors[] {
"comunodi",
"coraxster",
"damozhaeva",
"dankondr",
"daoready",
"dasmfm",
"davydovska",
@ -627,6 +641,7 @@ const char * auto_contributors[] {
"elBroom",
"elenaspb2019",
"emakarov",
"emhlbmc",
"emironyuk",
"evtan",
"exprmntr",
@ -673,6 +688,7 @@ const char * auto_contributors[] {
"javi santana",
"jetgm",
"jianmei zhang",
"jyz0309",
"kmeaw",
"koshachy",
"kreuzerkrieg",
@ -779,7 +795,9 @@ const char * auto_contributors[] {
"taiyang-li",
"tao jiang",
"tavplubix",
"templarzq",
"tiger.yan",
"tison",
"topvisor",
"tyrionhuang",
"ubuntu",
@ -800,6 +818,7 @@ const char * auto_contributors[] {
"weeds085490",
"xPoSx",
"yangshuai",
"ygrek",
"yhgcn",
"ylchou",
"yonesko",

View File

@ -135,6 +135,9 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
os.environ["CLICKHOUSE_DATABASE"] = database
# This is for .sh tests
os.environ.setdefault("CLICKHOUSE_LOG_COMMENT", case_file)
params = {
'client': args.client + ' --database=' + database,
'logs_level': server_logs_level,

View File

@ -113,9 +113,9 @@ class ClickHouseCluster:
self.zookeeper_config_path = p.join(self.base_dir, zookeeper_config_path) if zookeeper_config_path else p.join(
HELPERS_DIR, 'zookeeper_config.xml')
self.project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
project_name = pwd.getpwuid(os.getuid()).pw_name + p.basename(self.base_dir) + self.name
# docker-compose removes everything non-alphanumeric from project names so we do it too.
self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower())
self.project_name = re.sub(r'[^a-z0-9]', '', project_name.lower())
self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name))
self.docker_logs_path = p.join(self.instances_dir, 'docker.log')
@ -126,8 +126,8 @@ class ClickHouseCluster:
self.base_cmd = ['docker-compose']
if custom_dockerd_host:
self.base_cmd += ['--host', custom_dockerd_host]
self.base_cmd += ['--project-name', self.project_name]
self.base_cmd += ['--project-directory', self.base_dir, '--project-name', self.project_name]
self.base_zookeeper_cmd = None
self.base_mysql_cmd = []
self.base_kafka_cmd = []
@ -260,25 +260,23 @@ class ClickHouseCluster:
self.with_zookeeper = True
self.zookeeper_use_tmpfs = zookeeper_use_tmpfs
self.base_cmd.extend(['--file', zookeeper_docker_compose_path])
self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', zookeeper_docker_compose_path]
self.base_zookeeper_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', zookeeper_docker_compose_path]
cmds.append(self.base_zookeeper_cmd)
if with_mysql and not self.with_mysql:
self.with_mysql = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
cmds.append(self.base_mysql_cmd)
if with_postgres and not self.with_postgres:
self.with_postgres = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
cmds.append(self.base_postgres_cmd)
if with_odbc_drivers and not self.with_odbc_drivers:
@ -286,64 +284,57 @@ class ClickHouseCluster:
if not self.with_mysql:
self.with_mysql = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')])
self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
self.base_mysql_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]
cmds.append(self.base_mysql_cmd)
if not self.with_postgres:
self.with_postgres = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')])
self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
self.base_postgres_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]
cmds.append(self.base_postgres_cmd)
if with_kafka and not self.with_kafka:
self.with_kafka = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')])
self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
self.base_kafka_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]
cmds.append(self.base_kafka_cmd)
if with_kerberized_kafka and not self.with_kerberized_kafka:
self.with_kerberized_kafka = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')])
self.base_kerberized_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
self.base_kerberized_kafka_cmd = ['docker-compose','--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_kafka.yml')]
cmds.append(self.base_kerberized_kafka_cmd)
if with_rabbitmq and not self.with_rabbitmq:
self.with_rabbitmq = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')])
self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
self.base_rabbitmq_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]
cmds.append(self.base_rabbitmq_cmd)
if with_hdfs and not self.with_hdfs:
self.with_hdfs = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')])
self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
self.base_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]
cmds.append(self.base_hdfs_cmd)
if with_kerberized_hdfs and not self.with_kerberized_hdfs:
self.with_kerberized_hdfs = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')])
self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
self.base_kerberized_hdfs_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_kerberized_hdfs.yml')]
cmds.append(self.base_kerberized_hdfs_cmd)
if with_mongo and not self.with_mongo:
self.with_mongo = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')])
self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
self.base_mongo_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]
cmds.append(self.base_mongo_cmd)
if self.with_net_trics:
@ -353,26 +344,25 @@ class ClickHouseCluster:
if with_redis and not self.with_redis:
self.with_redis = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')])
self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
self.base_redis_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]
if with_minio and not self.with_minio:
self.with_minio = True
self.minio_certs_dir = minio_certs_dir
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')])
self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
self.base_minio_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]
cmds.append(self.base_minio_cmd)
if with_cassandra and not self.with_cassandra:
self.with_cassandra = True
self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')])
self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file',
p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
self.base_cassandra_cmd = ['docker-compose', '--project-name', self.project_name,
'--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]
print("Cluster name:{} project_name:{}. Added instance name:{} tag:{} base_cmd:{} docker_compose_yml_dir:{}".format(
self.name, self.project_name, name, tag, self.base_cmd, docker_compose_yml_dir))
return instance
def get_instance_docker_id(self, instance_name):
@ -407,7 +397,10 @@ class ClickHouseCluster:
return node
def get_instance_ip(self, instance_name):
print("get_instance_ip instance_name={}".format(instance_name))
docker_id = self.get_instance_docker_id(instance_name)
# for cont in self.docker_client.containers.list():
# print("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status))
handle = self.docker_client.containers.get(docker_id)
return list(handle.attrs['NetworkSettings']['Networks'].values())[0]['IPAddress']
@ -936,7 +929,7 @@ class ClickHouseInstance:
self.with_cassandra = with_cassandra
self.path = p.join(self.cluster.instances_dir, name)
self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
self.docker_compose_path = p.join(self.path, 'docker-compose.yml')
self.env_variables = env_variables or {}
if with_odbc_drivers:
self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
@ -1163,14 +1156,14 @@ class ClickHouseInstance:
if status == 'exited':
raise Exception(
"Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
handle.logs()))
handle.logs().decode('utf-8')))
current_time = time.time()
time_left = deadline - current_time
if deadline is not None and current_time >= deadline:
raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
"Container status: {}, logs: {}".format(self.name, self.ip_address, status,
handle.logs()))
handle.logs().decode('utf-8')))
# Repeatedly poll the instance address until there is something that listens there.
# Usually it means that ClickHouse is ready to accept queries.

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,61 @@
import pytest
from helpers.cluster import ClickHouseCluster
from multiprocessing.dummy import Pool
from helpers.network import PartitionManager
import time
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
node1.query('''
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id;
'''.format(replica=node1.name))
yield cluster
finally:
cluster.shutdown()
def corrupt_data_part_on_disk(node, table, part_name):
part_path = node.query(
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip()
node.exec_in_container(['bash', '-c',
'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
p=part_path)], privileged=True)
def test_merge_and_part_corruption(started_cluster):
node1.query("SYSTEM STOP REPLICATION QUEUES replicated_mt")
for i in range(4):
node1.query("INSERT INTO replicated_mt SELECT toDate('2019-10-01'), number, number * number FROM numbers ({f}, 100000)".format(f=i*100000))
assert node1.query("SELECT COUNT() FROM system.parts WHERE table='replicated_mt' AND active=1") == "4\n"
# Need to corrupt "border part" (left or right). If we will corrupt something in the middle
# clickhouse will not consider merge as broken, because we have parts with the same min and max
# block numbers.
corrupt_data_part_on_disk(node1, 'replicated_mt', 'all_3_3_0')
with Pool(1) as p:
def optimize_with_delay(x):
node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30)
# corrupt part after merge already assigned, but not started
res_opt = p.apply_async(optimize_with_delay, (1,))
node1.query("CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0})
# start merge
node1.query("SYSTEM START REPLICATION QUEUES replicated_mt")
res_opt.get()
# will hung if checked bug not fixed
node1.query("ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, timeout=30)
assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n"

View File

@ -13,18 +13,6 @@ def started_cluster():
try:
cluster.start()
for node in [node1, node2]:
node.query('''
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
'''.format(replica=node.name))
node1.query('''
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
SETTINGS min_bytes_for_wide_part=0;
''')
yield cluster
finally:
@ -54,6 +42,14 @@ def remove_part_from_disk(node, table, part_name):
def test_check_normal_table_corruption(started_cluster):
node1.query("DROP TABLE IF EXISTS non_replicated_mt")
node1.query('''
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
SETTINGS min_bytes_for_wide_part=0;
''')
node1.query("INSERT INTO non_replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
assert node1.query("CHECK TABLE non_replicated_mt PARTITION 201902",
settings={"check_query_single_value_result": 0}) == "201902_1_1_0\t1\t\n"
@ -94,8 +90,14 @@ def test_check_normal_table_corruption(started_cluster):
def test_check_replicated_table_simple(started_cluster):
node1.query("TRUNCATE TABLE replicated_mt")
node2.query("SYSTEM SYNC REPLICA replicated_mt")
for node in [node1, node2]:
node.query("DROP TABLE IF EXISTS replicated_mt")
node.query('''
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
'''.format(replica=node.name))
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
node2.query("SYSTEM SYNC REPLICA replicated_mt")
@ -119,34 +121,40 @@ def test_check_replicated_table_simple(started_cluster):
def test_check_replicated_table_corruption(started_cluster):
node1.query("TRUNCATE TABLE replicated_mt")
node2.query("SYSTEM SYNC REPLICA replicated_mt")
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
node1.query("INSERT INTO replicated_mt VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
node2.query("SYSTEM SYNC REPLICA replicated_mt")
for node in [node1, node2]:
node.query("DROP TABLE IF EXISTS replicated_mt_1")
assert node1.query("SELECT count() from replicated_mt") == "4\n"
assert node2.query("SELECT count() from replicated_mt") == "4\n"
node.query('''
CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
'''.format(replica=node.name))
node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-02-01'), 1, 10), (toDate('2019-02-01'), 2, 12)")
node1.query("INSERT INTO replicated_mt_1 VALUES (toDate('2019-01-02'), 3, 10), (toDate('2019-01-02'), 4, 12)")
node2.query("SYSTEM SYNC REPLICA replicated_mt_1")
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
assert node2.query("SELECT count() from replicated_mt_1") == "4\n"
part_name = node1.query(
"SELECT name from system.parts where table = 'replicated_mt' and partition_id = '201901' and active = 1").strip()
"SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1").strip()
corrupt_data_part_on_disk(node1, "replicated_mt", part_name)
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name)
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
p=part_name)
node1.query("SYSTEM SYNC REPLICA replicated_mt")
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
assert node1.query("SELECT count() from replicated_mt") == "4\n"
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"
remove_part_from_disk(node2, "replicated_mt", part_name)
assert node2.query("CHECK TABLE replicated_mt PARTITION 201901", settings={
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and queueing a fetch.\n".format(
remove_part_from_disk(node2, "replicated_mt_1", part_name)
assert node2.query("CHECK TABLE replicated_mt_1 PARTITION 201901", settings={
"check_query_single_value_result": 0}) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format(
p=part_name)
node1.query("SYSTEM SYNC REPLICA replicated_mt")
assert node1.query("CHECK TABLE replicated_mt PARTITION 201901",
node1.query("SYSTEM SYNC REPLICA replicated_mt_1")
assert node1.query("CHECK TABLE replicated_mt_1 PARTITION 201901",
settings={"check_query_single_value_result": 0}) == "{}\t1\t\n".format(part_name)
assert node1.query("SELECT count() from replicated_mt") == "4\n"
assert node1.query("SELECT count() from replicated_mt_1") == "4\n"

View File

@ -69,3 +69,6 @@ def test_limited_fetches(started_cluster):
assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"
node1.query("DROP TABLE IF EXISTS t SYNC")
node2.query("DROP TABLE IF EXISTS t SYNC")

View File

@ -79,9 +79,9 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam
check_query(clickhouse_node, """
SELECT key, unsigned_tiny_int, tiny_int, unsigned_small_int,
small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer,
small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer,
unsigned_bigint, _bigint, unsigned_float, _float, unsigned_double, _double, _varchar, _char, binary_col,
_date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */
_date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */
_bool FROM test_database.test_table_1 ORDER BY key FORMAT TSV
""",
"1\t2\t-1\t2\t-2\t3\t-3\t4\t-4\t5\t-5\t6\t-6\t3.2\t-3.2\t3.4\t-3.4\tvarchar\tchar\tbinary\\0\\0\t2020-01-01\t"
@ -720,7 +720,7 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
t = threading.Thread(target=insert, args=(1000,))
t.start()
# TODO: add clickhouse_node.restart_clickhouse(20, kill=False) test
clickhouse_node.restart_clickhouse(20, kill=True)
t.join()
@ -732,3 +732,14 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE kill_clickhouse_while_insert")
clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert")
def utf8mb4_test(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test")
mysql_node.query("CREATE DATABASE utf8mb4_test")
mysql_node.query("CREATE TABLE utf8mb4_test.test (id INT(11) NOT NULL PRIMARY KEY, name VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4")
mysql_node.query("INSERT INTO utf8mb4_test.test VALUES(1, '🦄'),(2, '\u2601')")
clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name))
check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n")

View File

@ -228,3 +228,8 @@ def test_clickhouse_killed_while_insert_5_7(started_cluster, started_mysql_5_7,
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0, clickhouse_node):
materialize_with_ddl.clickhouse_killed_while_insert(clickhouse_node, started_mysql_8_0, "mysql8_0")
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1")
materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0")

View File

@ -105,6 +105,8 @@ def test_kafka_json_as_string(kafka_cluster):
kafka_flush_interval_ms=1000;
''')
time.sleep(3)
result = instance.query('SELECT * FROM test.kafka;')
expected = '''\
{"t": 123, "e": {"x": "woof"} }

View File

@ -443,10 +443,14 @@ def test_infinite_redirect(cluster):
assert exception_raised
def test_storage_s3_get_gzip(cluster):
@pytest.mark.parametrize("extension,method", [
("bin", "gzip"),
("gz", "auto")
])
def test_storage_s3_get_gzip(cluster, extension, method):
bucket = cluster.minio_bucket
instance = cluster.instances["dummy"]
filename = "test_get_gzip.bin"
filename = f"test_get_gzip.{extension}"
name = "test_get_gzip"
data = [
"Sophia Intrieri,55",
@ -473,13 +477,15 @@ def test_storage_s3_get_gzip(cluster):
put_s3_file_content(cluster, bucket, filename, buf.getvalue())
try:
run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
name, cluster.minio_host, cluster.minio_port, bucket, filename))
run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
'CSV',
'{method}')""")
run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"]
finally:
run_query(instance, "DROP TABLE {}".format(name))
run_query(instance, f"DROP TABLE {name}")
def test_storage_s3_put_uncompressed(cluster):
@ -515,13 +521,17 @@ def test_storage_s3_put_uncompressed(cluster):
uncompressed_content = get_s3_file_content(cluster, bucket, filename)
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 753
finally:
run_query(instance, "DROP TABLE {}".format(name))
run_query(instance, f"DROP TABLE {name}")
def test_storage_s3_put_gzip(cluster):
@pytest.mark.parametrize("extension,method", [
("bin", "gzip"),
("gz", "auto")
])
def test_storage_s3_put_gzip(cluster, extension, method):
bucket = cluster.minio_bucket
instance = cluster.instances["dummy"]
filename = "test_put_gzip.bin"
filename = f"test_put_gzip.{extension}"
name = "test_put_gzip"
data = [
"'Joseph Tomlinson',5",
@ -541,8 +551,10 @@ def test_storage_s3_put_gzip(cluster):
"'Yolanda Joseph',89"
]
try:
run_query(instance, "CREATE TABLE {} (name String, id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV', 'gzip')".format(
name, cluster.minio_host, cluster.minio_port, bucket, filename))
run_query(instance, f"""CREATE TABLE {name} (name String, id UInt32) ENGINE = S3(
'http://{cluster.minio_host}:{cluster.minio_port}/{bucket}/{filename}',
'CSV',
'{method}')""")
run_query(instance, "INSERT INTO {} VALUES ({})".format(name, "),(".join(data)))
@ -553,4 +565,4 @@ def test_storage_s3_put_gzip(cluster):
uncompressed_content = f.read().decode()
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
finally:
run_query(instance, "DROP TABLE {}".format(name))
run_query(instance, f"DROP TABLE {name}")

View File

@ -91,3 +91,6 @@ def test_system_replicated_fetches(started_cluster):
for elem in fetches_result:
assert elem['elapsed'] >= prev_elapsed, "Elapsed time decreasing prev {}, next {}? It's a bug".format(prev_elapsed, elem['elapsed'])
prev_elapsed = elem['elapsed']
node1.query("DROP TABLE IF EXISTS t SYNC")
node2.query("DROP TABLE IF EXISTS t SYNC")

View File

@ -1,5 +1,7 @@
#!/usr/bin/env bash
CLICKHOUSE_LOG_COMMENT=''
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

Some files were not shown because too many files have changed in this diff Show More