Merge branch 'master' into grace_hash_join

This commit is contained in:
BigRedEye 2022-06-18 22:31:23 +03:00 committed by GitHub
commit 2693eaf9e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
52 changed files with 1078 additions and 496 deletions

View File

@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy
jobs:
CherryPick:
runs-on: [self-hosted, style-checker]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings

View File

@ -260,4 +260,35 @@ TRAP(mq_timedreceive)
TRAP(wordexp)
TRAP(wordfree)
/// C11 threading primitives are not supported by ThreadSanitizer.
/// Also we should avoid using them for compatibility with old libc.
TRAP(thrd_create)
TRAP(thrd_equal)
TRAP(thrd_current)
TRAP(thrd_sleep)
TRAP(thrd_yield)
TRAP(thrd_exit)
TRAP(thrd_detach)
TRAP(thrd_join)
TRAP(mtx_init)
TRAP(mtx_lock)
TRAP(mtx_timedlock)
TRAP(mtx_trylock)
TRAP(mtx_unlock)
TRAP(mtx_destroy)
TRAP(call_once)
TRAP(cnd_init)
TRAP(cnd_signal)
TRAP(cnd_broadcast)
TRAP(cnd_wait)
TRAP(cnd_timedwait)
TRAP(cnd_destroy)
TRAP(tss_create)
TRAP(tss_get)
TRAP(tss_set)
TRAP(tss_delete)
#endif

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit 801bd5138ce31aa0d906fa4e2eabfc599d74e793
Subproject commit 462196e6b4a47f924293a0e26b8e9c23d37ac26f

View File

@ -84,7 +84,6 @@ set (SRCS
"${LIBRARY_DIR}/lib/gopher.c"
"${LIBRARY_DIR}/lib/idn_win32.c"
"${LIBRARY_DIR}/lib/http_proxy.c"
"${LIBRARY_DIR}/lib/non-ascii.c"
"${LIBRARY_DIR}/lib/asyn-thread.c"
"${LIBRARY_DIR}/lib/curl_gssapi.c"
"${LIBRARY_DIR}/lib/http_ntlm.c"
@ -93,10 +92,8 @@ set (SRCS
"${LIBRARY_DIR}/lib/curl_sasl.c"
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/hostcheck.c"
"${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/dotdot.c"
"${LIBRARY_DIR}/lib/x509asn1.c"
"${LIBRARY_DIR}/lib/http2.c"
"${LIBRARY_DIR}/lib/smb.c"
"${LIBRARY_DIR}/lib/curl_endian.c"
@ -120,6 +117,9 @@ set (SRCS
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
"${LIBRARY_DIR}/lib/mqtt.c"
"${LIBRARY_DIR}/lib/rename.c"
"${LIBRARY_DIR}/lib/h2h3.c"
"${LIBRARY_DIR}/lib/headers.c"
"${LIBRARY_DIR}/lib/timediff.c"
"${LIBRARY_DIR}/lib/vauth/vauth.c"
"${LIBRARY_DIR}/lib/vauth/cleartext.c"
"${LIBRARY_DIR}/lib/vauth/cram.c"
@ -142,11 +142,13 @@ set (SRCS
"${LIBRARY_DIR}/lib/vtls/sectransp.c"
"${LIBRARY_DIR}/lib/vtls/gskit.c"
"${LIBRARY_DIR}/lib/vtls/mbedtls.c"
"${LIBRARY_DIR}/lib/vtls/mesalink.c"
"${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/keylog.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
"${LIBRARY_DIR}/lib/vtls/hostcheck.c"
"${LIBRARY_DIR}/lib/vquic/ngtcp2.c"
"${LIBRARY_DIR}/lib/vquic/quiche.c"
"${LIBRARY_DIR}/lib/vquic/msh3.c"
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
"${LIBRARY_DIR}/lib/vssh/libssh.c"
)

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit b8554f1682062c85ba519eb54ef2f90e02b812cb
Subproject commit 81b413cc1c2a33ad4e96df856b89184efbd6221c

View File

@ -7,26 +7,27 @@ set -x
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
function install_packages()
{

View File

@ -27,7 +27,7 @@ Compressed data for `INSERT` and `ALTER` queries is replicated (for more informa
- The `DROP TABLE` query deletes the replica located on the server where the query is run.
- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas.
ClickHouse uses [Apache ZooKeeper](https://zookeeper.apache.org) for storing replicas meta information. Use ZooKeeper version 3.4.5 or newer.
ClickHouse uses [ClickHouse Keeper](../../../guides/sre/keeper/clickhouse-keeper.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended.
To use replication, set parameters in the [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) server configuration section.
@ -35,7 +35,7 @@ To use replication, set parameters in the [zookeeper](../../../operations/server
Dont neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.
:::
Example of setting the addresses of the ZooKeeper cluster:
Example of setting the addresses of the ClickHouse Keeper cluster:
``` xml
<zookeeper>
@ -54,8 +54,8 @@ Example of setting the addresses of the ZooKeeper cluster:
</zookeeper>
```
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
ClickHouse also supports storing replicas meta information in an auxiliary ZooKeeper cluster. Do this by providing the ZooKeeper cluster name and path as engine arguments.
In other words, it supports storing the metadata of different tables in different ZooKeeper clusters.
Example of setting the addresses of the auxiliary ZooKeeper cluster:
@ -122,8 +122,8 @@ The `Replicated` prefix is added to the table engine name. For example:`Replicat
**Replicated\*MergeTree parameters**
- `zoo_path` — The path to the table in ZooKeeper.
- `replica_name` — The replica name in ZooKeeper.
- `zoo_path` — The path to the table in ClickHouse Keeper.
- `replica_name` — The replica name in ClickHouse Keeper.
- `other_parameters` — Parameters of an engine which is used for creating the replicated version, for example, version in `ReplacingMergeTree`.
Example:
@ -168,18 +168,18 @@ Example:
</macros>
```
The path to the table in ZooKeeper should be unique for each replicated table. Tables on different shards should have different paths.
The path to the table in ClickHouse Keeper should be unique for each replicated table. Tables on different shards should have different paths.
In this case, the path consists of the following parts:
`/clickhouse/tables/` is the common prefix. We recommend using exactly this one.
`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the example cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier.
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query.
`table_name` is the name of the node for the table in ClickHouse Keeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query.
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
Be careful with table renames when using these built-in substitutions. The path in Zookeeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in Zookeeper, and will go into read-only mode.
Be careful with table renames when using these built-in substitutions. The path in ClickHouse Keeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in ClickHouse Keeper, and will go into read-only mode.
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.
@ -220,21 +220,21 @@ To delete a replica, run `DROP TABLE`. However, only one replica is deleted
## Recovery After Failures {#recovery-after-failures}
If ZooKeeper is unavailable when a server starts, replicated tables switch to read-only mode. The system periodically attempts to connect to ZooKeeper.
If ClickHouse Keeper is unavailable when a server starts, replicated tables switch to read-only mode. The system periodically attempts to connect to ClickHouse Keeper.
If ZooKeeper is unavailable during an `INSERT`, or an error occurs when interacting with ZooKeeper, an exception is thrown.
If ClickHouse Keeper is unavailable during an `INSERT`, or an error occurs when interacting with ClickHouse Keeper, an exception is thrown.
After connecting to ZooKeeper, the system checks whether the set of data in the local file system matches the expected set of data (ZooKeeper stores this information). If there are minor inconsistencies, the system resolves them by syncing data with the replicas.
After connecting to ClickHouse Keeper, the system checks whether the set of data in the local file system matches the expected set of data (ClickHouse Keeper stores this information). If there are minor inconsistencies, the system resolves them by syncing data with the replicas.
If the system detects broken data parts (with the wrong size of files) or unrecognized parts (parts written to the file system but not recorded in ZooKeeper), it moves them to the `detached` subdirectory (they are not deleted). Any missing parts are copied from the replicas.
If the system detects broken data parts (with the wrong size of files) or unrecognized parts (parts written to the file system but not recorded in ClickHouse Keeper), it moves them to the `detached` subdirectory (they are not deleted). Any missing parts are copied from the replicas.
Note that ClickHouse does not perform any destructive actions such as automatically deleting a large amount of data.
When the server starts (or establishes a new session with ZooKeeper), it only checks the quantity and sizes of all files. If the file sizes match but bytes have been changed somewhere in the middle, this is not detected immediately, but only when attempting to read the data for a `SELECT` query. The query throws an exception about a non-matching checksum or size of a compressed block. In this case, data parts are added to the verification queue and copied from the replicas if necessary.
When the server starts (or establishes a new session with ClickHouse Keeper), it only checks the quantity and sizes of all files. If the file sizes match but bytes have been changed somewhere in the middle, this is not detected immediately, but only when attempting to read the data for a `SELECT` query. The query throws an exception about a non-matching checksum or size of a compressed block. In this case, data parts are added to the verification queue and copied from the replicas if necessary.
If the local set of data differs too much from the expected one, a safety mechanism is triggered. The server enters this in the log and refuses to launch. The reason for this is that this case may indicate a configuration error, such as if a replica on a shard was accidentally configured like a replica on a different shard. However, the thresholds for this mechanism are set fairly low, and this situation might occur during normal failure recovery. In this case, data is restored semi-automatically - by “pushing a button”.
To start recovery, create the node `/path_to_table/replica_name/flags/force_restore_data` in ZooKeeper with any content, or run the command to restore all replicated tables:
To start recovery, create the node `/path_to_table/replica_name/flags/force_restore_data` in ClickHouse Keeper with any content, or run the command to restore all replicated tables:
``` bash
sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data
@ -249,11 +249,11 @@ If all data and metadata disappeared from one of the servers, follow these steps
1. Install ClickHouse on the server. Define substitutions correctly in the config file that contains the shard identifier and replicas, if you use them.
2. If you had unreplicated tables that must be manually duplicated on the servers, copy their data from a replica (in the directory `/var/lib/clickhouse/data/db_name/table_name/`).
3. Copy table definitions located in `/var/lib/clickhouse/metadata/` from a replica. If a shard or replica identifier is defined explicitly in the table definitions, correct it so that it corresponds to this replica. (Alternatively, start the server and make all the `ATTACH TABLE` queries that should have been in the .sql files in `/var/lib/clickhouse/metadata/`.)
4. To start recovery, create the ZooKeeper node `/path_to_table/replica_name/flags/force_restore_data` with any content, or run the command to restore all replicated tables: `sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data`
4. To start recovery, create the ClickHouse Keeper node `/path_to_table/replica_name/flags/force_restore_data` with any content, or run the command to restore all replicated tables: `sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data`
Then start the server (restart, if it is already running). Data will be downloaded from replicas.
An alternative recovery option is to delete information about the lost replica from ZooKeeper (`/path_to_table/replica_name`), then create the replica again as described in “[Creating replicated tables](#creating-replicated-tables)”.
An alternative recovery option is to delete information about the lost replica from ClickHouse Keeper (`/path_to_table/replica_name`), then create the replica again as described in “[Creating replicated tables](#creating-replicated-tables)”.
There is no restriction on network bandwidth during recovery. Keep this in mind if you are restoring many replicas at once.
@ -276,13 +276,13 @@ Create a MergeTree table with a different name. Move all the data from the direc
If you want to get rid of a `ReplicatedMergeTree` table without launching the server:
- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`).
- Delete the corresponding path in ZooKeeper (`/path_to_table/replica_name`).
- Delete the corresponding path in ClickHouse Keeper (`/path_to_table/replica_name`).
After this, you can launch the server, create a `MergeTree` table, move the data to its directory, and then restart the server.
## Recovery When Metadata in the Zookeeper Cluster Is Lost or Damaged {#recovery-when-metadata-in-the-zookeeper-cluster-is-lost-or-damaged}
## Recovery When Metadata in the ClickHouse Keeper Cluster Is Lost or Damaged {#recovery-when-metadata-in-the-zookeeper-cluster-is-lost-or-damaged}
If the data in ZooKeeper was lost or damaged, you can save data by moving it to an unreplicated table as described above.
If the data in ClickHouse Keeper was lost or damaged, you can save data by moving it to an unreplicated table as described above.
**See Also**

View File

@ -19,11 +19,10 @@ This function encrypts data using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
- aes-128-ctr, aes-192-ctr, aes-256-ctr
**Syntax**
@ -63,9 +62,9 @@ Insert some data (please avoid storing the keys/ivs in the database as this unde
Query:
``` sql
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
INSERT INTO encryption_test VALUES('aes-256-ofb no IV', encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212')),\
('aes-256-ofb no IV, different key', encrypt('aes-256-ofb', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-ofb with IV', encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
@ -78,12 +77,12 @@ SELECT comment, hex(secret) FROM encryption_test;
Result:
``` text
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
┌─comment──────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-ofb no IV │ B4972BDC4459 │
│ aes-256-ofb no IV, different key │ 2FF57C092DC9 │
│ aes-256-ofb with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└──────────────────────────────────┴──────────────────────────────────┘
```
Example with `-gcm`:
@ -116,9 +115,7 @@ Supported encryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Syntax**
@ -145,7 +142,7 @@ Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
Query:
``` sql
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
SELECT encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Result:
@ -161,14 +158,14 @@ But `encrypt` fails when `key` or `iv` is longer than expected:
Query:
``` sql
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
SELECT encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Result:
``` text
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
Received exception from server (version 22.6.1):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
@ -176,7 +173,7 @@ While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
Query:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
SELECT hex(aes_encrypt_mysql('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Result:
@ -192,7 +189,7 @@ Notice how supplying even longer `IV` produces the same result
Query:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
SELECT hex(aes_encrypt_mysql('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Result:
@ -206,7 +203,7 @@ Result:
Which is binary equal to what MySQL produces on same inputs:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
mysql> SET block_encryption_mode='aes-256-ofb';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
@ -224,11 +221,10 @@ This function decrypts ciphertext into a plaintext using these modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
- aes-128-ctr, aes-192-ctr, aes-256-ctr
**Syntax**
@ -265,12 +261,12 @@ Result:
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
┌─comment──────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-ofb no IV │ B4972BDC4459 │
│ aes-256-ofb no IV, different key │ 2FF57C092DC9 │
│ aes-256-ofb with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└──────────────────────────────────┴──────────────────────────────────┘
```
Now let's try to decrypt all that data.
@ -284,13 +280,19 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920
Result:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
┌─comment──────────────┬─plaintext──┐
│ aes-256-gcm │ OQ<4F>E
<20>t<EFBFBD>7T<37>\<5C><><EFBFBD>\<5C>
│ aes-256-gcm with AAD │ OQ<4F>E
<20>\<5C><>si<73><69><EFBFBD><EFBFBD>;<3B>o<EFBFBD><6F>
└──────────────────────┴────────────┘
┌─comment──────────────────────────┬─plaintext─┐
│ aes-256-ofb no IV │ Secret │
│ aes-256-ofb no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-ofb with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└──────────────────────────────────┴───────────┘
```
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
@ -305,9 +307,7 @@ Supported decryption modes:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Syntax**
@ -332,7 +332,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
Let's decrypt data we've previously encrypted with MySQL:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
mysql> SET block_encryption_mode='aes-256-ofb';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
@ -347,7 +347,7 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv
Query:
``` sql
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
SELECT aes_decrypt_mysql('aes-256-ofb', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
```
Result:

View File

@ -273,16 +273,16 @@ Converts ASCII Latin symbols in a string to uppercase.
## lowerUTF8
Converts a string to lowercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text.
It does not detect the language. So for Turkish the result might not be exactly correct.
It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
If the string contains a set of bytes that is not UTF-8, then the behavior is undefined.
If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
## upperUTF8
Converts a string to uppercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text.
It does not detect the language. So for Turkish the result might not be exactly correct.
It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
If the string contains a set of bytes that is not UTF-8, then the behavior is undefined.
If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
## isValidUTF8

View File

@ -7,7 +7,7 @@ sidebar_label: For Searching in Strings
The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search.
:::note
:::note
Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately.
:::
@ -31,7 +31,7 @@ position(needle IN haystack)
Alias: `locate(haystack, needle[, start_pos])`.
:::note
:::note
Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`.
:::
@ -344,24 +344,27 @@ Returns 1, if at least one string needle<sub>i</sub> matches the string `haystac
For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`.
:::note
:::note
In all `multiSearch*` functions the number of needles should be less than 2<sup>8</sup> because of implementation specification.
:::
## match(haystack, pattern)
Checks whether the string matches the `pattern` regular expression. A `re2` regular expression. The [syntax](https://github.com/google/re2/wiki/Syntax) of the `re2` regular expressions is more limited than the syntax of the Perl regular expressions.
Checks whether the string matches the regular expression `pattern` in `re2` syntax. `Re2` has a more limited [syntax](https://github.com/google/re2/wiki/Syntax) than Perl regular expressions.
Returns 0 if it does not match, or 1 if it matches.
The regular expression works with the string as if it is a set of bytes. The regular expression cant contain null bytes.
Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.
## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) library. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster.
:::note
:::note
The length of any of the `haystack` string must be less than 2<sup>32</sup> bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API.
:::
@ -385,11 +388,11 @@ The same as `multiFuzzyMatchAny`, but returns any index that matches the haystac
The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance.
:::note
:::note
`multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction.
:::
:::note
:::note
To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`.
:::
@ -405,7 +408,7 @@ Extracts all the fragments of a string using a regular expression. If haystac
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc.
:::note
:::note
`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical).
:::
@ -498,6 +501,10 @@ The regular expression can contain the metasymbols `%` and `_`.
Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the match function.
Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function.
For other regular expressions, the code is the same as for the match function.
@ -509,6 +516,8 @@ The same thing as like, but negative.
Case insensitive variant of [like](https://clickhouse.com/docs/en/sql-reference/functions/string-search-functions/#function-like) function. You can use `ILIKE` operator instead of the `ilike` function.
The function ignores the language, e.g. for Turkish (i/İ), the result might be incorrect.
**Syntax**
``` sql
@ -577,7 +586,7 @@ Same as `ngramDistance` but calculates the non-symmetric difference between `nee
For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.
:::note
:::note
For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one this works for Latin and mostly for all Cyrillic letters.
:::

View File

@ -48,9 +48,9 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
## WITH ROLLUP Modifier
## ROLLUP Modifier
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
`ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
@ -78,7 +78,7 @@ Consider the table t:
Query:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
SELECT year, month, day, count(*) FROM t GROUP BY ROLLUP(year, month, day);
```
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
@ -109,10 +109,14 @@ As `GROUP BY` section has three key expressions, the result contains four tables
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
The same query also can be written using `WITH` keyword.
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
```
## WITH CUBE Modifier
## CUBE Modifier
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
`CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
@ -138,7 +142,7 @@ Consider the table t:
Query:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
SELECT year, month, day, count(*) FROM t GROUP BY CUBE(year, month, day);
```
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
@ -196,6 +200,10 @@ Columns, excluded from `GROUP BY`, are filled with zeros.
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
The same query also can be written using `WITH` keyword.
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
```
## WITH TOTALS Modifier
@ -260,6 +268,39 @@ GROUP BY domain
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
## GROUPING SETS modifier
This is the most general modifier.
This modifier allows to manually specify several aggregation key sets (grouping sets).
Aggregation is performed separately for each grouping set, after that all results are combined.
If a column is not presented in a grouping set, it's filled with a default value.
In other words, modifiers described above can be represented via `GROUPING SETS`.
Despite the fact that queries with `ROLLUP`, `CUBE` and `GROUPING SETS` modifiers are syntactically equal, they may have different performance.
When `GROUPING SETS` try to execute everything in parallel, `ROLLUP` and `CUBE` are executing the final merging of the aggregates in a single thread.
In the situation when source columns contain default values, it might be hard to distinguish if a row is a part of the aggregation which uses those columns as keys or not.
To solve this problem `GROUPING` function must be used.
**Example**
The following two queries are equivalent.
```sql
-- Query 1
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
-- Query 2
SELECT year, month, day, count(*) FROM t GROUP BY
GROUPING SETS
(
(year, month, day),
(year, month),
(year),
()
);
```
## Implementation Details
Aggregation is one of the most important features of a column-oriented DBMS, and thus its implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.

View File

@ -55,3 +55,372 @@ https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html
https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
## Syntax
```text
aggregate_function (column_name)
OVER ([PARTITION BY groupping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bounds_of_frame])
```
- `PARTITION BY` - defines how to break a resultset into groups.
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
```text
PARTITION
┌─────────────────┐ <-- UNBOUNDED PRECEDING (BEGINNING of the PARTITION)
│ │
│ │
│=================│ <-- N PRECEDING <
│ N ROWS │ │ F
│ Before CURRENT │ │ R
│~~~~~~~~~~~~~~~~~│ <-- CURRENT ROW A
│ M ROWS │ │ M
│ After CURRENT │ │ E
│=================│ <-- M FOLLOWING <
│ │
│ │
└─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION)
```
## Examples
```sql
CREATE TABLE wf_partition
(
`part_key` UInt64,
`value` UInt64
)
ENGINE = Memory;
INSERT INTO wf_partition FORMAT Values
(1,1,1), (1,2,2), (1,3,3), (2,0,0), (3,0,0);
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key) AS frame_values
FROM wf_partition
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1,2,3] │ <
│ 1 │ 2 │ 2 │ [1,2,3] │ │ 1-st group
│ 1 │ 3 │ 3 │ [1,2,3] │ <
│ 2 │ 0 │ 0 │ [0] │ <- 2-nd group
│ 3 │ 0 │ 0 │ [0] │ <- 3-d group
└──────────┴───────┴───────┴──────────────┘
```
```sql
CREATE TABLE wf_frame
(
`part_key` UInt64,
`value` UInt64,
`order` UInt64
)
ENGINE = Memory;
INSERT INTO wf_frame FORMAT Values
(1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5);
-- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1,2,3,4,5] │
│ 1 │ 2 │ 2 │ [1,2,3,4,5] │
│ 1 │ 3 │ 3 │ [1,2,3,4,5] │
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
-- short form - no bound expression, no order by
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1,2,3,4,5] │
│ 1 │ 2 │ 2 │ [1,2,3,4,5] │
│ 1 │ 3 │ 3 │ [1,2,3,4,5] │
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
-- frame is bounded by the beggining of a partition and the current row
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1] │
│ 1 │ 2 │ 2 │ [1,2] │
│ 1 │ 3 │ 3 │ [1,2,3] │
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
-- short form (frame is bounded by the beggining of a partition and the current row)
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1] │
│ 1 │ 2 │ 2 │ [1,2] │
│ 1 │ 3 │ 3 │ [1,2,3] │
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
-- frame is bounded by the beggining of a partition and the current row, but order is backward
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order DESC) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [5,4,3,2,1] │
│ 1 │ 2 │ 2 │ [5,4,3,2] │
│ 1 │ 3 │ 3 │ [5,4,3] │
│ 1 │ 4 │ 4 │ [5,4] │
│ 1 │ 5 │ 5 │ [5] │
└──────────┴───────┴───────┴──────────────┘
-- sliding frame - 1 PRECEDING ROW AND CURRENT ROW
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1] │
│ 1 │ 2 │ 2 │ [1,2] │
│ 1 │ 3 │ 3 │ [2,3] │
│ 1 │ 4 │ 4 │ [3,4] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
-- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
value ASC;
┌─part_key─┬─value─┬─order─┬─frame_values─┐
│ 1 │ 1 │ 1 │ [1,2,3,4,5] │
│ 1 │ 2 │ 2 │ [1,2,3,4,5] │
│ 1 │ 3 │ 3 │ [2,3,4,5] │
│ 1 │ 4 │ 4 │ [3,4,5] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
```
## Real world examples
### Maximum/total salary per department.
```sql
CREATE TABLE employees
(
`department` String,
`employee_name` String,
`salary` Float
)
ENGINE = Memory;
INSERT INTO employees FORMAT Values
('Finance', 'Jonh', 200),
('Finance', 'Joan', 210),
('Finance', 'Jean', 505),
('IT', 'Tim', 200),
('IT', 'Anna', 300),
('IT', 'Elen', 500);
SELECT
department,
employee_name AS emp,
salary,
max_salary_per_dep,
total_salary_per_dep,
round((salary / total_salary_per_dep) * 100, 2) AS `share_per_dep(%)`
FROM
(
SELECT
department,
employee_name,
salary,
max(salary) OVER wndw AS max_salary_per_dep,
sum(salary) OVER wndw AS total_salary_per_dep
FROM employees
WINDOW wndw AS (PARTITION BY department
rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
ORDER BY
department ASC,
employee_name ASC
);
┌─department─┬─emp──┬─salary─┬─max_salary_per_dep─┬─total_salary_per_dep─┬─share_per_dep(%)─┐
│ Finance │ Jean │ 505 │ 505 │ 915 │ 55.19 │
│ Finance │ Joan │ 210 │ 505 │ 915 │ 22.95 │
│ Finance │ Jonh │ 200 │ 505 │ 915 │ 21.86 │
│ IT │ Anna │ 300 │ 500 │ 1000 │ 30 │
│ IT │ Elen │ 500 │ 500 │ 1000 │ 50 │
│ IT │ Tim │ 200 │ 500 │ 1000 │ 20 │
└────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘
```
### Cumulative sum.
```sql
CREATE TABLE events
(
`metric` String,
`ts` DateTime,
`value` Float
)
ENGINE = Memory
INSERT INTO warehouse VALUES
('sku38', '2020-01-01', 9),
('sku38', '2020-02-01', 1),
('sku38', '2020-03-01', -4),
('sku1', '2020-01-01', 1),
('sku1', '2020-02-01', 1),
('sku1', '2020-03-01', 1);
SELECT
item,
ts,
value,
sum(value) OVER (PARTITION BY item ORDER BY ts ASC) AS stock_balance
FROM warehouse
ORDER BY
item ASC,
ts ASC;
┌─item──┬──────────────────ts─┬─value─┬─stock_balance─┐
│ sku1 │ 2020-01-01 00:00:00 │ 1 │ 1 │
│ sku1 │ 2020-02-01 00:00:00 │ 1 │ 2 │
│ sku1 │ 2020-03-01 00:00:00 │ 1 │ 3 │
│ sku38 │ 2020-01-01 00:00:00 │ 9 │ 9 │
│ sku38 │ 2020-02-01 00:00:00 │ 1 │ 10 │
│ sku38 │ 2020-03-01 00:00:00 │ -4 │ 6 │
└───────┴─────────────────────┴───────┴───────────────┘
```
### Moving / Sliding Average (per 3 rows)
```sql
CREATE TABLE sensors
(
`metric` String,
`ts` DateTime,
`value` Float
)
ENGINE = Memory;
insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87),
('cpu_temp', '2020-01-01 00:00:01', 77),
('cpu_temp', '2020-01-01 00:00:02', 93),
('cpu_temp', '2020-01-01 00:00:03', 87),
('cpu_temp', '2020-01-01 00:00:04', 87),
('cpu_temp', '2020-01-01 00:00:05', 87),
('cpu_temp', '2020-01-01 00:00:06', 87),
('cpu_temp', '2020-01-01 00:00:07', 87);
SELECT
metric,
ts,
value,
avg(value) OVER
(PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW)
AS moving_avg_temp
FROM sensors
ORDER BY
metric ASC,
ts ASC;
┌─metric───┬──────────────────ts─┬─value─┬───moving_avg_temp─┐
│ cpu_temp │ 2020-01-01 00:00:00 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:00:01 │ 77 │ 82 │
│ cpu_temp │ 2020-01-01 00:00:02 │ 93 │ 85.66666666666667 │
│ cpu_temp │ 2020-01-01 00:00:03 │ 87 │ 85.66666666666667 │
│ cpu_temp │ 2020-01-01 00:00:04 │ 87 │ 89 │
│ cpu_temp │ 2020-01-01 00:00:05 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:00:06 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:00:07 │ 87 │ 87 │
└──────────┴─────────────────────┴───────┴───────────────────┘
```
### Moving / Sliding Average (per 10 seconds)
```sql
SELECT
metric,
ts,
value,
avg(value) OVER (PARTITION BY metric ORDER BY ts
Range BETWEEN 10 PRECEDING AND CURRENT ROW) AS moving_avg_10_seconds_temp
FROM sensors
ORDER BY
metric ASC,
ts ASC;
┌─metric───┬──────────────────ts─┬─value─┬─moving_avg_10_seconds_temp─┐
│ cpu_temp │ 2020-01-01 00:00:00 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:01:10 │ 77 │ 77 │
│ cpu_temp │ 2020-01-01 00:02:20 │ 93 │ 93 │
│ cpu_temp │ 2020-01-01 00:03:30 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:04:40 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:05:50 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:06:00 │ 87 │ 87 │
│ cpu_temp │ 2020-01-01 00:07:10 │ 87 │ 87 │
└──────────┴─────────────────────┴───────┴────────────────────────────┘
```

View File

@ -19,11 +19,10 @@ sidebar_label: "Функции для шифрования"
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
- aes-128-ctr, aes-192-ctr, aes-256-ctr
**Синтаксис**
@ -63,9 +62,9 @@ ENGINE = Memory;
Запрос:
``` sql
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
INSERT INTO encryption_test VALUES('aes-256-ofb no IV', encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212')),\
('aes-256-ofb no IV, different key', encrypt('aes-256-ofb', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-ofb with IV', encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
@ -78,12 +77,12 @@ SELECT comment, hex(secret) FROM encryption_test;
Результат:
``` text
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
┌─comment──────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-ofb no IV │ B4972BDC4459 │
│ aes-256-ofb no IV, different key │ 2FF57C092DC9 │
│ aes-256-ofb with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└──────────────────────────────────┴──────────────────────────────────┘
```
Пример в режиме `-gcm`:
@ -116,9 +115,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Синтаксис**
@ -145,7 +142,7 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
Запрос:
``` sql
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
SELECT encrypt('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-ofb', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Результат:
@ -161,14 +158,14 @@ SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', '
Запрос:
``` sql
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
SELECT encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Результат:
``` text
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
Однако функция `aes_encrypt_mysql` в аналогичном случае возвращает результат, который может быть обработан MySQL:
@ -176,7 +173,7 @@ Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid ke
Запрос:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
SELECT hex(aes_encrypt_mysql('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Результат:
@ -192,7 +189,7 @@ SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '1234567891012131415161
Запрос:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
SELECT hex(aes_encrypt_mysql('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Результат:
@ -206,7 +203,7 @@ SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '1234567891012131415161
Это совпадает с результатом, возвращаемым MySQL при таких же входящих значениях:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
mysql> SET block_encryption_mode='aes-256-ofb';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
@ -224,11 +221,10 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
- aes-128-gcm, aes-192-gcm, aes-256-gcm
- aes-128-ctr, aes-192-ctr, aes-256-ctr
**Синтаксис**
@ -265,12 +261,12 @@ SELECT comment, hex(secret) FROM encryption_test;
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
┌─comment──────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-ofb no IV │ B4972BDC4459 │
│ aes-256-ofb no IV, different key │ 2FF57C092DC9 │
│ aes-256-ofb with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└──────────────────────────────────┴──────────────────────────────────┘
```
Теперь попытаемся расшифровать эти данные:
@ -278,19 +274,25 @@ SELECT comment, hex(secret) FROM encryption_test;
Запрос:
``` sql
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test;
SELECT comment, decrypt('aes-256-ofb', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test;
```
Результат:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
┌─comment──────────────┬─plaintext──┐
│ aes-256-gcm │ OQ<4F>E
<20>t<EFBFBD>7T<37>\<5C><><EFBFBD>\<5C>
│ aes-256-gcm with AAD │ OQ<4F>E
<20>\<5C><>si<73><69><EFBFBD><EFBFBD>;<3B>o<EFBFBD><6F>
└──────────────────────┴────────────┘
┌─comment──────────────────────────┬─plaintext─┐
│ aes-256-ofb no IV │ Secret │
│ aes-256-ofb no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-ofb with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└──────────────────────────────────┴───────────┘
```
Обратите внимание, что только часть данных была расшифрована верно. Оставшаяся часть расшифрована некорректно, так как при шифровании использовались другие значения `mode`, `key`, или `iv`.
@ -305,9 +307,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
- aes-128-cfb128
- aes-128-ofb, aes-192-ofb, aes-256-ofb
**Синтаксис**
@ -333,7 +333,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
mysql> SET block_encryption_mode='aes-256-ofb';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
@ -348,7 +348,7 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv
Запрос:
``` sql
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext;
SELECT aes_decrypt_mysql('aes-256-ofb', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext;
```
Результат:

View File

@ -600,6 +600,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \
M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
M(Bool, allow_grace_hash_join, true, "Allow selecting grace hash join in 'auto' join algorithm", 0) \
M(UInt64, grace_hash_join_initial_buckets, 32, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \

View File

@ -232,6 +232,8 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
CrossToInnerJoinVisitor(cross_to_inner).visit(query);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases};
join_to_subs_data.try_to_keep_original_names = settings.multiple_joins_try_to_keep_original_names;
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query);
}

View File

@ -361,6 +361,7 @@ struct CheckAliasDependencyVisitorData
dependency = &ident;
}
};
using CheckAliasDependencyMatcher = OneTypeMatcher<CheckAliasDependencyVisitorData>;
using CheckAliasDependencyVisitor = InDepthNodeVisitor<CheckAliasDependencyMatcher, true>;
@ -500,6 +501,7 @@ void restoreName(ASTIdentifier & ident, const String & original_name, NameSet &
{
if (!ident.tryGetAlias().empty())
return;
if (original_name.empty())
return;
@ -509,7 +511,9 @@ void restoreName(ASTIdentifier & ident, const String & original_name, NameSet &
restored_names.emplace(original_name);
}
else
{
ident.setShortName(original_name);
}
}
/// Find clashes and normalize names
@ -527,12 +531,12 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
{
size_t last_table_pos = tables.size() - 1;
NameSet restored_names;
std::vector<TableNeededColumns> needed_columns;
needed_columns.reserve(tables.size());
for (const auto & table : tables)
needed_columns.push_back(TableNeededColumns{table.table});
NameSet restored_names;
for (ASTIdentifier * ident : identifiers)
{
bool got_alias = aliases.contains(ident->name());
@ -729,7 +733,10 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
std::unordered_set<ASTIdentifier *> public_identifiers;
for (auto & top_level_child : select.select()->children)
if (auto * ident = top_level_child->as<ASTIdentifier>())
public_identifiers.insert(ident);
{
if (!data.try_to_keep_original_names || startsWith(ident->name(), UniqueShortNames::pattern))
public_identifiers.insert(ident);
}
UniqueShortNames unique_names;
std::vector<TableNeededColumns> needed_columns =

View File

@ -21,6 +21,7 @@ public:
const std::vector<TableWithColumnNamesAndTypes> & tables;
const Aliases & aliases;
bool done = false;
bool try_to_keep_original_names = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &);

View File

@ -103,7 +103,6 @@ bool ExecutionThreadContext::executeTask()
#endif
span.addAttribute("thread_number", thread_number);
span.addAttribute("processor.description", node->processor->getDescription());
return node->exception == nullptr;
}

View File

@ -1,4 +1,5 @@
#include "PostgreSQLSource.h"
#include "Common/Exception.h"
#if USE_LIBPQXX
#include <Columns/ColumnNullable.h>
@ -22,6 +23,10 @@
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_MANY_COLUMNS;
}
template<typename T>
PostgreSQLSource<T>::PostgreSQLSource(
@ -123,6 +128,11 @@ Chunk PostgreSQLSource<T>::generate()
if (!row)
break;
if (row->size() > description.sample_block.columns())
throw Exception(ErrorCodes::TOO_MANY_COLUMNS,
"Row has too many columns: {}, expected structure: {}",
row->size(), description.sample_block.dumpStructure());
for (const auto idx : collections::range(0, row->size()))
{
const auto & sample = description.sample_block.getByPosition(idx);

View File

@ -1,48 +1,72 @@
#!/usr/bin/env python3
import sys
import argparse
import logging
import os
import subprocess
from env_helper import GITHUB_WORKSPACE, TEMP_PATH
from get_robot_token import get_parameter_from_ssm
from get_robot_token import get_best_robot_token
from ssh import SSHKey
from cherry_pick_utils.backport import Backport
from cherry_pick_utils.cherrypick import CherryPick
def parse_args():
parser = argparse.ArgumentParser("Create cherry-pick and backport PRs")
parser.add_argument("--token", help="github token, if not set, used from smm")
parser.add_argument("--dry-run", action="store_true", help="do not create anything")
return parser.parse_args()
def main():
args = parse_args()
token = args.token or get_best_robot_token()
bp = Backport(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
)
cherry_pick = CherryPick(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
1,
"master",
)
# Use the same _gh in both objects to have a proper cost
# pylint: disable=protected-access
for key in bp._gh.api_costs:
if key in cherry_pick._gh.api_costs:
bp._gh.api_costs[key] += cherry_pick._gh.api_costs[key]
for key in cherry_pick._gh.api_costs:
if key not in bp._gh.api_costs:
bp._gh.api_costs[key] = cherry_pick._gh.api_costs[key]
cherry_pick._gh = bp._gh
# pylint: enable=protected-access
def cherrypick_run(pr_data, branch):
cherry_pick.update_pr_branch(pr_data, branch)
return cherry_pick.execute(GITHUB_WORKSPACE, args.dry_run)
try:
bp.execute(GITHUB_WORKSPACE, "origin", None, cherrypick_run)
except subprocess.CalledProcessError as e:
logging.error(e.output)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
repo_path = GITHUB_WORKSPACE
temp_path = TEMP_PATH
if not os.path.exists(temp_path):
os.makedirs(temp_path)
if not os.path.exists(TEMP_PATH):
os.makedirs(TEMP_PATH)
sys.path.append(os.path.join(repo_path, "utils/github"))
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
token = get_parameter_from_ssm("github_robot_token_1")
bp = Backport(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
)
def cherrypick_run(token, pr, branch):
return CherryPick(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
pr,
branch,
).execute(repo_path, False)
try:
bp.execute(repo_path, "origin", None, cherrypick_run)
except subprocess.CalledProcessError as e:
logging.error(e.output)
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
main()
else:
main()

View File

@ -1 +1,2 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

View File

@ -1,24 +1,22 @@
# -*- coding: utf-8 -*-
try:
from clickhouse.utils.github.cherrypick import CherryPick
from clickhouse.utils.github.query import Query as RemoteRepo
from clickhouse.utils.github.local import Repository as LocalRepo
except:
from .cherrypick import CherryPick
from .query import Query as RemoteRepo
from .local import Repository as LocalRepo
import argparse
import logging
import os
import re
import sys
sys.path.append(os.path.dirname(__file__))
from cherrypick import CherryPick
from query import Query as RemoteRepo
from local import Repository as LocalRepo
class Backport:
def __init__(self, token, owner, name, team):
self._gh = RemoteRepo(
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
token, owner=owner, name=name, team=team, max_page_size=60, min_page_size=7
)
self._token = token
self.default_branch_name = self._gh.default_branch
@ -49,14 +47,16 @@ class Backport:
logging.info("No release branches found!")
return
for branch in branches:
logging.info("Found release branch: %s", branch[0])
logging.info(
"Found release branches: %s", ", ".join([br[0] for br in branches])
)
if not until_commit:
until_commit = branches[0][1]
pull_requests = self.getPullRequests(until_commit)
backport_map = {}
pr_map = {pr["number"]: pr for pr in pull_requests}
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
@ -68,17 +68,17 @@ class Backport:
pr["mergeCommit"]["oid"]
):
logging.info(
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
pr["number"], branches[-1][0]
)
"PR #%s is already inside %s. Dropping this branch for further PRs",
pr["number"],
branches[-1][0],
)
branches.pop()
logging.info("Processing PR #{}".format(pr["number"]))
logging.info("Processing PR #%s", pr["number"])
assert len(branches)
assert len(branches) != 0
branch_set = set([branch[0] for branch in branches])
branch_set = {branch[0] for branch in branches}
# First pass. Find all must-backports
for label in pr["labels"]["nodes"]:
@ -120,16 +120,16 @@ class Backport:
)
for pr, branches in list(backport_map.items()):
logging.info("PR #%s needs to be backported to:", pr)
statuses = []
for branch in branches:
logging.info(
"\t%s, and the status is: %s",
branch,
run_cherrypick(self._token, pr, branch),
)
branch_status = run_cherrypick(pr_map[pr], branch)
statuses.append(f"{branch}, and the status is: {branch_status}")
logging.info(
"PR #%s needs to be backported to:\n\t%s", pr, "\n\t".join(statuses)
)
# print API costs
logging.info("\nGitHub API total costs per query:")
logging.info("\nGitHub API total costs for backporting per query:")
for name, value in list(self._gh.api_costs.items()):
logging.info("%s : %s", name, value)
@ -178,8 +178,13 @@ if __name__ == "__main__":
else:
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
cherrypick_run = lambda token, pr, branch: CherryPick(
token, "ClickHouse", "ClickHouse", "core", pr, branch
).execute(args.repo, args.dry_run)
cherry_pick = CherryPick(
args.token, "ClickHouse", "ClickHouse", "core", 1, "master"
)
def cherrypick_run(pr_data, branch):
cherry_pick.update_pr_branch(pr_data, branch)
return cherry_pick.execute(args.repo, args.dry_run)
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)

View File

@ -14,10 +14,6 @@ Second run checks PR from previous run to be merged or at least being mergeable.
Third run creates PR from backport branch (with merged previous PR) to release branch.
"""
try:
from clickhouse.utils.github.query import Query as RemoteRepo
except:
from .query import Query as RemoteRepo
import argparse
from enum import Enum
@ -26,6 +22,10 @@ import os
import subprocess
import sys
sys.path.append(os.path.dirname(__file__))
from query import Query as RemoteRepo
class CherryPick:
class Status(Enum):
@ -45,20 +45,21 @@ class CherryPick:
def __init__(self, token, owner, name, team, pr_number, target_branch):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
self._pr = self._gh.get_pull_request(pr_number)
self.target_branch = target_branch
self.ssh_url = self._gh.ssh_url
# TODO: check if pull-request is merged.
self.update_pr_branch(self._pr, self.target_branch)
def update_pr_branch(self, pr_data, target_branch):
"""The method is here to avoid unnecessary creation of new objects"""
self._pr = pr_data
self.target_branch = target_branch
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
self.target_branch = target_branch
self.backport_branch = "backport/{branch}/{pr}".format(
branch=target_branch, pr=pr_number
)
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
branch=target_branch, oid=self.merge_commit_oid
)
self.backport_branch = f"backport/{target_branch}/{pr_data['number']}"
self.cherrypick_branch = f"cherrypick/{target_branch}/{self.merge_commit_oid}"
def getCherryPickPullRequest(self):
return self._gh.find_pull_request(
@ -118,17 +119,16 @@ class CherryPick:
)
# Create pull-request like a local cherry-pick
title = self._pr["title"].replace('"', r"\"")
pr = self._gh.create_pull_request(
source=self.cherrypick_branch,
target=self.backport_branch,
title="Cherry pick #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
),
description="Original pull-request #{}\n\n{}".format(
self._pr["number"], DESCRIPTION
title=(
f'Cherry pick #{self._pr["number"]} '
f"to {self.target_branch}: "
f"{title}"
),
description=f'Original pull-request #{self._pr["number"]}\n\n{DESCRIPTION}',
)
# FIXME: use `team` to leave a single eligible assignee.
@ -165,11 +165,8 @@ class CherryPick:
"user.name=robot-clickhouse",
]
pr_title = "Backport #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
)
title = (self._pr["title"].replace('"', r"\""),)
pr_title = f"Backport #{self._pr['number']} to {self.target_branch}: {title}"
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
@ -203,9 +200,8 @@ class CherryPick:
source=self.backport_branch,
target=self.target_branch,
title=pr_title,
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
),
description=f"Original pull-request #{self._pr['number']}\n"
f"Cherry-pick pull-request #{cherrypick_pr['number']}\n\n{DESCRIPTION}",
)
# FIXME: use `team` to leave a single eligible assignee.

View File

@ -5,10 +5,11 @@ import logging
import os
import re
import git
class RepositoryBase:
def __init__(self, repo_path):
import git
self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
@ -23,22 +24,22 @@ class RepositoryBase:
self.comparator = functools.cmp_to_key(cmp)
def get_head_commit(self):
return self._repo.commit(self._default)
def iterate(self, begin, end):
rev_range = "{}...{}".format(begin, end)
rev_range = f"{begin}...{end}"
for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit
class Repository(RepositoryBase):
def __init__(self, repo_path, remote_name, default_branch_name):
super(Repository, self).__init__(repo_path)
super().__init__(repo_path)
self._remote = self._repo.remotes[remote_name]
self._remote.fetch()
self._default = self._remote.refs[default_branch_name]
def get_head_commit(self):
return self._repo.commit(self._default)
def get_release_branches(self):
"""
Returns sorted list of tuples:
@ -73,7 +74,7 @@ class Repository(RepositoryBase):
class BareRepository(RepositoryBase):
def __init__(self, repo_path, default_branch_name):
super(BareRepository, self).__init__(repo_path)
super().__init__(repo_path)
self._default = self._repo.branches[default_branch_name]
def get_release_branches(self):

View File

@ -1,7 +1,13 @@
# -*- coding: utf-8 -*-
import requests
import json
import inspect
import logging
import time
from urllib3.util.retry import Retry # type: ignore
import requests # type: ignore
from requests.adapters import HTTPAdapter # type: ignore
class Query:
@ -10,43 +16,43 @@ class Query:
"""
_PULL_REQUEST = """
author {{
... on User {{
id
login
}}
}}
baseRepository {{
nameWithOwner
}}
mergeCommit {{
oid
parents(first: {min_page_size}) {{
totalCount
nodes {{
oid
}}
}}
}}
mergedBy {{
... on User {{
id
login
}}
}}
baseRefName
closed
headRefName
author {{
... on User {{
id
mergeable
merged
number
title
url
login
}}
}}
baseRepository {{
nameWithOwner
}}
mergeCommit {{
oid
parents(first: {min_page_size}) {{
totalCount
nodes {{
oid
}}
}}
}}
mergedBy {{
... on User {{
id
login
}}
}}
baseRefName
closed
headRefName
id
mergeable
merged
number
title
url
"""
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
@ -56,6 +62,7 @@ class Query:
self._owner = owner
self._name = name
self._team = team
self._session = None
self._max_page_size = max_page_size
self._min_page_size = min_page_size
@ -71,13 +78,13 @@ class Query:
def get_repository(self):
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
name
}}
id
sshUrl
}}
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
name
}}
id
sshUrl
}}
"""
query = _QUERY.format(owner=self._owner, name=self._name)
@ -91,20 +98,20 @@ class Query:
"""
_QUERY = """
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
login
}}
}}
}}
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
login
}}
}}
}}
}}
"""
members = {}
@ -126,20 +133,24 @@ class Query:
organization=self._owner,
team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
next=f'after: "{result["pageInfo"]["endCursor"]}"',
)
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
# Update members with new nodes compatible with py3.8-py3.10
members = {
**members,
**{node["login"]: node["id"] for node in result["nodes"]},
}
return members
def get_pull_request(self, number):
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{
{pull_request_data}
}}
}}
repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{
{pull_request_data}
}}
}}
"""
query = _QUERY.format(
@ -153,14 +164,16 @@ class Query:
def find_pull_request(self, base, head):
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
nodes {{
{pull_request_data}
}}
totalCount
}}
}}
repository(owner: "{owner}" name: "{name}") {{
pullRequests(
first: {min_page_size} baseRefName: "{base}" headRefName: "{head}"
) {{
nodes {{
{pull_request_data}
}}
totalCount
}}
}}
"""
query = _QUERY.format(
@ -182,13 +195,13 @@ class Query:
Get all pull-requests filtered by label name
"""
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
nodes {{
{pull_request_data}
}}
}}
}}
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
nodes {{
{pull_request_data}
}}
}}
}}
"""
query = _QUERY.format(
@ -206,35 +219,32 @@ class Query:
"""
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
target {{
... on Commit {{
history(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
oid
associatedPullRequests(first: {min_page_size}) {{
totalCount
nodes {{
... on PullRequest {{
{pull_request_data}
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
target {{
... on Commit {{
history(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
oid
associatedPullRequests(first: {min_page_size}) {{
totalCount
nodes {{
... on PullRequest {{
{pull_request_data}
labels(first: {min_page_size}) {{
totalCount
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
}}
labels(first: {min_page_size}) {{
totalCount
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
}}
@ -243,6 +253,9 @@ class Query:
}}
}}
}}
}}
}}
}}
"""
pull_requests = []
@ -267,7 +280,7 @@ class Query:
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
next=f'after: "{result["pageInfo"]["endCursor"]}"',
)
for commit in result["nodes"]:
@ -285,7 +298,7 @@ class Query:
for pull_request in commit["associatedPullRequests"]["nodes"]:
if (
pull_request["baseRepository"]["nameWithOwner"]
== "{}/{}".format(self._owner, self._name)
== f"{self._owner}/{self._name}"
and pull_request["baseRefName"] == self.default_branch
and pull_request["mergeCommit"]["oid"] == commit["oid"]
):
@ -297,19 +310,19 @@ class Query:
self, source, target, title, description="", draft=False, can_modify=True
):
_QUERY = """
createPullRequest(input: {{
baseRefName: "{target}",
headRefName: "{source}",
repositoryId: "{id}",
title: "{title}",
body: "{body}",
draft: {draft},
maintainerCanModify: {modify}
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
createPullRequest(input: {{
baseRefName: "{target}",
headRefName: "{source}",
repositoryId: "{id}",
title: "{title}",
body: "{body}",
draft: {draft},
maintainerCanModify: {modify}
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
"""
query = _QUERY.format(
@ -324,29 +337,29 @@ class Query:
)
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
def merge_pull_request(self, id):
def merge_pull_request(self, pr_id):
_QUERY = """
mergePullRequest(input: {{
pullRequestId: "{id}"
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
mergePullRequest(input: {{
pullRequestId: "{pr_id}"
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
"""
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
query = _QUERY.format(pr_id=pr_id, pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
# FIXME: figure out how to add more assignees at once
def add_assignee(self, pr, assignee):
_QUERY = """
addAssigneesToAssignable(input: {{
assignableId: "{id1}",
assigneeIds: "{id2}"
}}) {{
clientMutationId
}}
addAssigneesToAssignable(input: {{
assignableId: "{id1}",
assigneeIds: "{id2}"
}}) {{
clientMutationId
}}
"""
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
@ -362,28 +375,28 @@ class Query:
"""
_GET_LABEL = """
repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
name
color
}}
}}
}}
repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
name
color
}}
}}
}}
"""
_SET_LABEL = """
addLabelsToLabelable(input: {{
labelableId: "{pr_id}",
labelIds: "{label_id}"
}}) {{
clientMutationId
}}
addLabelsToLabelable(input: {{
labelableId: "{pr_id}",
labelIds: "{label_id}"
}}) {{
clientMutationId
}}
"""
labels = []
@ -404,10 +417,10 @@ class Query:
name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
next=f'after: "{result["pageInfo"]["endCursor"]}"',
)
labels += [label for label in result["nodes"]]
labels += list(result["nodes"])
if not labels:
return
@ -415,83 +428,105 @@ class Query:
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
self._run(query, is_mutation=True)
def _run(self, query, is_mutation=False):
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
# sleep a little, because we querying github too often
print("Request, is mutation", is_mutation)
time.sleep(0.5)
def requests_retry_session(
retries=5,
backoff_factor=0.5,
@property
def session(self):
if self._session is not None:
return self._session
retries = 5
self._session = requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=1,
status_forcelist=(403, 500, 502, 504),
session=None,
):
session = session or requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
)
adapter = HTTPAdapter(max_retries=retry)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
return self._session
headers = {"Authorization": "bearer {}".format(self._token)}
def _run(self, query, is_mutation=False):
# Get caller and parameters from the stack to track the progress
frame = inspect.getouterframes(inspect.currentframe(), 2)[1]
caller = frame[3]
f_parameters = inspect.signature(getattr(self, caller)).parameters
parameters = ", ".join(str(frame[0].f_locals[p]) for p in f_parameters)
mutation = ""
if is_mutation:
query = """
mutation {{
{query}
}}
""".format(
query=query
)
else:
query = """
query {{
{query}
rateLimit {{
cost
remaining
}}
}}
""".format(
query=query
)
mutation = ", is mutation"
print(f"---GraphQL request for {caller}({parameters}){mutation}---")
while True:
request = requests_retry_session().post(
headers = {"Authorization": f"bearer {self._token}"}
if is_mutation:
query = f"""
mutation {{
{query}
}}
"""
else:
query = f"""
query {{
{query}
rateLimit {{
cost
remaining
}}
}}
"""
def request_with_retry(retry=0):
max_retries = 5
# From time to time we face some concrete errors, when it worth to
# retry instead of failing competely
# We should sleep progressively
progressive_sleep = 5 * sum(i + 1 for i in range(retry))
if progressive_sleep:
logging.warning(
"Retry GraphQL request %s time, sleep %s seconds",
retry,
progressive_sleep,
)
time.sleep(progressive_sleep)
response = self.session.post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if request.status_code == 200:
result = request.json()
result = response.json()
if response.status_code == 200:
if "errors" in result:
raise Exception(
"Errors occurred: {}\nOriginal query: {}".format(
result["errors"], query
)
f"Errors occurred: {result['errors']}\nOriginal query: {query}"
)
if not is_mutation:
import inspect
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
if caller not in list(self.api_costs.keys()):
if caller not in self.api_costs:
self.api_costs[caller] = 0
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
return result["data"]
else:
import json
raise Exception(
"Query failed with code {code}:\n{json}".format(
code=request.status_code,
json=json.dumps(request.json(), indent=4),
)
elif (
response.status_code == 403
and "secondary rate limit" in result["message"]
):
if retry <= max_retries:
logging.warning("Secondary rate limit reached")
return request_with_retry(retry + 1)
elif response.status_code == 502 and "errors" in result:
too_many_data = any(
True
for err in result["errors"]
if "message" in err
and "This may be the result of a timeout" in err["message"]
)
if too_many_data:
logging.warning(
"Too many data is requested, decreasing page size %s by 10%%",
self._max_page_size,
)
self._max_page_size = int(self._max_page_size * 0.9)
return request_with_retry(retry)
data = json.dumps(result, indent=4)
raise Exception(f"Query failed with code {response.status_code}:\n{data}")
return request_with_retry()

View File

@ -420,6 +420,10 @@ class SettingsRandomizer:
"max_block_size": lambda: random.randint(8000, 100000),
"max_threads": lambda: random.randint(1, 64),
"optimize_or_like_chain": lambda: random.randint(0, 1),
"optimize_read_in_order": lambda: random.randint(0, 1),
"read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
"optimize_aggregation_in_order": lambda: random.randint(0, 1),
"aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),
}
@staticmethod

View File

@ -1,4 +1,6 @@
#!/usr/bin/env bash
# Tags: no-tsan
# FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
# Such a huge timeout mostly for debug build.
CLICKHOUSE_CURL_TIMEOUT=60

View File

@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 5"
$CLICKHOUSE_CLIENT -q "SELECT a, s FROM m ORDER BY a, s LIMIT 10"
# Not a single .sql test with max_rows_to_read because it doesn't work with Merge storage
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g')
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 --optimize_read_in_order=1 | grep "rows_read" | sed 's/[^0-9]*//g')
# Expected number of read rows with a bit margin
if [[ $rows_read -lt 500 ]]
@ -36,7 +36,7 @@ fi
$CLICKHOUSE_CLIENT -q "SELECT '---StorageBuffer---'"
$CLICKHOUSE_CLIENT -q "CREATE TABLE buf (a UInt32, s String) engine = Buffer('$CLICKHOUSE_DATABASE', s2, 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
$CLICKHOUSE_CLIENT -q "SELECT a, s FROM buf ORDER BY a, s LIMIT 10"
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a FROM buf ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g')
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a FROM buf ORDER BY a LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 --optimize_read_in_order=1 | grep "rows_read" | sed 's/[^0-9]*//g')
# Expected number of read rows with a bit margin
if [[ $rows_read -lt 500 ]]
@ -48,7 +48,7 @@ fi
$CLICKHOUSE_CLIENT -q "SELECT '---MaterializedView---'"
$CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW mv (a UInt32, s String) engine = MergeTree ORDER BY s SETTINGS min_bytes_for_wide_part = 0 POPULATE AS SELECT a, s FROM s1 WHERE a % 7 = 0"
$CLICKHOUSE_CLIENT -q "SELECT a, s FROM mv ORDER BY s LIMIT 10"
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a, s FROM mv ORDER BY s LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 | grep "rows_read" | sed 's/[^0-9]*//g')
rows_read=$($CLICKHOUSE_CLIENT -q "SELECT a, s FROM mv ORDER BY s LIMIT 10 FORMAT JSON" --max_threads=1 --max_block_size=20 --optimize_read_in_order=1 | grep "rows_read" | sed 's/[^0-9]*//g')
if [[ $rows_read -lt 500 ]]
then echo "OK"

View File

@ -1,5 +1,6 @@
-- Tags: no-s3-storage
-- Tags: no-s3-storage, no-tsan
-- FIXME this test fails with S3 due to a bug in DiskCacheWrapper
-- FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
drop table if exists txn_counters;
create table txn_counters (n Int64, creation_tid DEFAULT transactionID()) engine=MergeTree order by n;

View File

@ -1,4 +1,6 @@
#!/usr/bin/env bash
# Tags: no-tsan
# FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
# Tags: long
# Tags: long, no-tsan
# FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -3,11 +3,11 @@ hello 1 3 world
9
9 (0,1)
key tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 1) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 2) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 3) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 4) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 5)
4 10 20 10 20 30
3 70 20 10 20 30
2 11 20 10 20 30
5 10 20 10 20 30
1 20 20 10 20 30
2 11 20 10 20 30
3 70 20 10 20 30
4 10 20 10 20 30
5 10 20 10 20 30
6 10 20 10 20 30
7 18 20 10 20 30
8 30 20 10 20 30

View File

@ -6,5 +6,5 @@ select argMax(untuple(x)), min(x) from (select (number, number + 1) as x from nu
drop table if exists kv;
create table kv (key int, v1 int, v2 int, v3 int, v4 int, v5 int) engine MergeTree order by key;
insert into kv values (1, 10, 20, 10, 20, 30), (2, 11, 20, 10, 20, 30), (1, 18, 20, 10, 20, 30), (1, 20, 20, 10, 20, 30), (3, 70, 20, 10, 20, 30), (4, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (5, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (8, 30, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (6, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (7, 18, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (7, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (8, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30);
select key, untuple(argMax((* except (key),), v1)) from kv group by key format TSVWithNames;
select key, untuple(argMax((* except (key),), v1)) from kv group by key order by key format TSVWithNames;
drop table if exists kv;

View File

@ -6,7 +6,7 @@ INSERT INTO tab VALUES (1,1),(1,2),(1,3),(1,4),(1,5);
INSERT INTO tab VALUES (2,6),(2,7),(2,8),(2,9),(2,0);
SELECT * FROM tab ORDER BY x LIMIT 3;
SELECT * FROM tab ORDER BY x LIMIT 4;
SELECT * FROM tab ORDER BY x LIMIT 3 SETTINGS optimize_read_in_order=1;
SELECT * FROM tab ORDER BY x LIMIT 4 SETTINGS optimize_read_in_order=1;
DROP TABLE IF EXISTS tab;

View File

@ -1,5 +1,5 @@
[0,2,3] id2
[1,2,3] id1
[0,2,3] id2
test [1,2,3,4]
2 fre
3 jhg

View File

@ -16,7 +16,8 @@ WITH
SELECT arraySort(arrayIntersect(argMax(seqs, create_time), arr1)) AS common, id
FROM tags
WHERE id LIKE 'id%'
GROUP BY id;
GROUP BY id
ORDER BY id;
DROP TABLE tags;

View File

@ -8,9 +8,9 @@
14 21
16 24
18 27
\N 0
\N -1
\N -2
\N -1
\N 0
\N 0
\N -1
\N -2

View File

@ -3,13 +3,14 @@ DROP TABLE IF EXISTS nullable_key_without_final_mark;
DROP TABLE IF EXISTS nullable_minmax_index;
SET max_threads = 1;
SET optimize_read_in_order=0;
CREATE TABLE nullable_key (k Nullable(int), v int) ENGINE MergeTree ORDER BY k SETTINGS allow_nullable_key = 1, index_granularity = 1;
INSERT INTO nullable_key SELECT number * 2, number * 3 FROM numbers(10);
INSERT INTO nullable_key SELECT NULL, -number FROM numbers(3);
SELECT * FROM nullable_key ORDER BY k;
SELECT * FROM nullable_key ORDER BY k, v;
SET force_primary_key = 1;
SET max_rows_to_read = 3;

View File

@ -12,7 +12,7 @@ set max_memory_usage='500M';
set max_threads=1;
set max_block_size=500;
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null; -- { serverError 241; }
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241; }
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1;
-- for WITH TOTALS previous groups should be kept.
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; }

View File

@ -8,6 +8,6 @@ ALTER TABLE enum_test MODIFY COLUMN e Enum8('IU' = 1, 'WS' = 2, 'PS' = 3);
INSERT INTO enum_test SELECT '2020-10-09 00:00:00', 'h1', 'PS' from numbers(1);
SELECT * FROM enum_test ORDER BY timestamp, e desc;
SELECT * FROM enum_test ORDER BY timestamp, e desc SETTINGS optimize_read_in_order=1;
DROP TABLE IF EXISTS enum_test;

View File

@ -1,4 +1,5 @@
SET optimize_monotonous_functions_in_order_by = 1;
SET optimize_read_in_order = 1;
DROP TABLE IF EXISTS test_order_by;

View File

@ -1,3 +1,6 @@
-- Tags: no-tsan
-- FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
drop table if exists perf_lc_num;
CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
# Tags: long, no-fasttest
# Tags: long, no-fasttest, no-tsan
# FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_query_to_cnf=0"
CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_query_to_cnf=0 --optimize_read_in_order=1"
$CLICKHOUSE_CLIENT -q "drop table if exists test_index"
$CLICKHOUSE_CLIENT -q "drop table if exists idx"

View File

@ -1,4 +1,5 @@
-- Tags: no-random-settings
-- Tags: no-random-settings, no-tsan
-- FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
DROP TABLE IF EXISTS order_by_desc;

View File

@ -1,4 +1,6 @@
SET max_threads=0;
SET optimize_read_in_order=1;
SET read_in_order_two_level_merge_threshold=100;
DROP TABLE IF EXISTS t_read_in_order;

View File

@ -7,6 +7,7 @@ SETTINGS index_granularity = 4;
INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100);
SET max_threads = 1;
SET optimize_read_in_order = 1;
SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4;

View File

@ -2,7 +2,7 @@
0 0 0
-- { echoOn }
insert into data_02233 select number%10, number%3, number from numbers(100);
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1;
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1, read_in_order_two_level_merge_threshold=1;
(Expression)
ExpressionTransform × 2
(Sorting)
@ -20,7 +20,7 @@ ExpressionTransform × 2
ExpressionTransform
(ReadFromMergeTree)
MergeTreeInOrder 0 → 1
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1;
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=0, read_in_order_two_level_merge_threshold=1;
(Expression)
ExpressionTransform × 2
(Sorting)
@ -103,7 +103,7 @@ select parent_key, child_key, count() from data_02233 group by parent_key, child
9 2 3
0 0 100
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1;
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=0;
0 0 4
0 1 3
0 2 3

View File

@ -7,11 +7,11 @@ SELECT child_key, parent_key, child_key FROM data_02233 GROUP BY parent_key, chi
-- { echoOn }
insert into data_02233 select number%10, number%3, number from numbers(100);
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1;
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1;
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1, read_in_order_two_level_merge_threshold=1;
explain pipeline select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=0, read_in_order_two_level_merge_threshold=1;
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1;
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=1, max_block_size=1;
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1;
select parent_key, child_key, count() from data_02233 group by parent_key, child_key with totals order by parent_key, child_key settings max_threads=1, optimize_aggregation_in_order=0;
-- fuzzer
SELECT child_key, parent_key, child_key FROM data_02233 GROUP BY parent_key, child_key, child_key ORDER BY child_key, parent_key ASC NULLS LAST SETTINGS max_threads = 1, optimize_aggregation_in_order = 1;

View File

@ -0,0 +1,22 @@
-- https://github.com/ClickHouse/ClickHouse/issues/34697
SELECT table1_id FROM (
SELECT first.table1_id
FROM (SELECT number+1 as table1_id FROM numbers(1)) as first
JOIN (SELECT number+1 as table2_id FROM numbers(1)) as second ON first.table1_id = second.table2_id
JOIN (SELECT number+1 as table3_id FROM numbers(1)) as third ON first.table1_id = third.table3_id
); -- { serverError UNKNOWN_IDENTIFIER }
SELECT table1_id FROM (
SELECT first.table1_id
FROM (SELECT number+1 as table1_id FROM numbers(1)) as first
JOIN (SELECT number+1 as table2_id FROM numbers(1)) as second ON first.table1_id = second.table2_id
JOIN (SELECT number+1 as table3_id FROM numbers(1)) as third ON first.table1_id = third.table3_id
) SETTINGS multiple_joins_try_to_keep_original_names = 1;
SELECT aaa FROM (
SELECT first.table1_id as aaa
FROM (SELECT number+1 as table1_id FROM numbers(1)) as first
JOIN (SELECT number+1 as table2_id FROM numbers(1)) as second ON first.table1_id = second.table2_id
JOIN (SELECT number+1 as table3_id FROM numbers(1)) as third ON first.table1_id = third.table3_id
) SETTINGS multiple_joins_try_to_keep_original_names = 1;

View File

@ -1,4 +1,6 @@
#!/usr/bin/env bash
# Tags: no-tsan
# FIXME It became flaky after upgrading to llvm-14 due to obscure freezes in tsan
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh