mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into fix-01111_create_drop_replicated_db_stress
This commit is contained in:
commit
8fce71838c
@ -4,6 +4,8 @@ services:
|
||||
kafka_zookeeper:
|
||||
image: zookeeper:3.4.9
|
||||
hostname: kafka_zookeeper
|
||||
ports:
|
||||
- 2181:2181
|
||||
environment:
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
@ -15,15 +17,14 @@ services:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
hostname: kafka1
|
||||
ports:
|
||||
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
|
||||
- ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
|
||||
environment:
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
|
||||
KAFKA_ADVERTISED_HOST_NAME: kafka1
|
||||
KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
depends_on:
|
||||
@ -35,13 +36,38 @@ services:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
||||
schema-registry-auth:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry-auth
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
|
||||
SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
|
||||
volumes:
|
||||
- ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
@ -76,6 +76,7 @@ The supported formats are:
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
@ -1515,6 +1516,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set
|
||||
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
|
||||
:::
|
||||
|
||||
## RowBinaryWithDefaults {#rowbinarywithdefaults}
|
||||
|
||||
Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used.
|
||||
|
||||
Examples:
|
||||
|
||||
```sql
|
||||
:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000')
|
||||
|
||||
┌──x─┬─y─┐
|
||||
│ 42 │ 1 │
|
||||
└────┴───┘
|
||||
```
|
||||
|
||||
For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided.
|
||||
For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`.
|
||||
|
||||
## RowBinary format settings {#row-binary-format-settings}
|
||||
|
||||
- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`.
|
||||
|
@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
|
||||
```
|
||||
|
||||
:::note ALL
|
||||
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse.
|
||||
Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command.
|
||||
:::
|
||||
|
||||
## Background
|
||||
|
@ -1325,6 +1325,17 @@ Default value: 0.
|
||||
|
||||
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
|
||||
|
||||
Format:
|
||||
``` text
|
||||
http://[user:password@]machine[:port]"
|
||||
```
|
||||
|
||||
Examples:
|
||||
``` text
|
||||
http://registry.example.com:8081
|
||||
http://admin:secret@registry.example.com:8081
|
||||
```
|
||||
|
||||
Default value: `Empty`.
|
||||
|
||||
### output_format_avro_codec {#output_format_avro_codec}
|
||||
|
@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
|
||||
|
||||
For an alternative to `age`, see function `date\_diff`.
|
||||
@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
|
@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
||||
## firstLine
|
||||
|
||||
Returns the first line from a multi-line string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The first line of the input value or the whole value if there is no line
|
||||
separators. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
|
||||
:::
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
|
||||
|
@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
|
@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
|
||||
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
|
||||
|
||||
**Синтаксис**
|
||||
@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
|
@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00
|
||||
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
|
||||
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
|
||||
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
|
||||
|
||||
## firstLine
|
||||
|
||||
Возвращает первую строку в многострочном тексте.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `val` - текст для обработки. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Первая строка текста или весь текст, если переносы строк отсутствуют.
|
||||
|
||||
Тип: [String](../data-types/string.md)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
|
||||
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
|
||||
:::
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
|
||||
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
|
@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
|
||||
可能的值:
|
||||
|
||||
- `microsecond`
|
||||
- `millisecond`
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
|
@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
|
||||
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。
|
||||
|
@ -887,6 +887,7 @@ try
|
||||
#endif
|
||||
|
||||
global_context->setRemoteHostFilter(config());
|
||||
global_context->setHTTPHeaderFilter(config());
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
@ -1200,6 +1201,7 @@ try
|
||||
}
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
global_context->setHTTPHeaderFilter(*config);
|
||||
|
||||
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
|
||||
|
@ -866,6 +866,14 @@
|
||||
-->
|
||||
<!--</remote_url_allow_hosts>-->
|
||||
|
||||
<!-- The list of HTTP headers forbidden to use in HTTP-related storage engines and table functions.
|
||||
If this section is not present in configuration, all headers are allowed.
|
||||
-->
|
||||
<!-- <http_forbid_headers>
|
||||
<header>exact_header</header>
|
||||
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
|
||||
</http_forbid_headers> -->
|
||||
|
||||
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
|
||||
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
|
||||
Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.
|
||||
|
204
rust/skim/Cargo.lock
generated
204
rust/skim/Cargo.lock
generated
@ -42,17 +42,6 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi 0.1.19",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@ -104,31 +93,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_lex",
|
||||
"indexmap",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codespan-reporting"
|
||||
version = "0.11.1"
|
||||
@ -214,9 +178,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8"
|
||||
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cxxbridge-flags",
|
||||
@ -226,9 +190,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx-build"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb"
|
||||
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"codespan-reporting",
|
||||
@ -236,24 +200,24 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"scratch",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-flags"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8"
|
||||
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-macro"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d"
|
||||
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -359,19 +323,6 @@ version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -398,32 +349,11 @@ dependencies = [
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
@ -454,16 +384,6 @@ version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "link-cplusplus"
|
||||
version = "1.0.8"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
|
||||
checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
@ -564,7 +484,7 @@ version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi 0.3.1",
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
@ -574,12 +494,6 @@ version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
version = "1.0.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@ -648,9 +562,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.8.4"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
|
||||
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -659,39 +585,33 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.2"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.12"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
|
||||
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scratch"
|
||||
version = "1.0.5"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1"
|
||||
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.164"
|
||||
version = "1.0.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
||||
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
@ -699,23 +619,19 @@ version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"beef",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crossbeam",
|
||||
"defer-drop",
|
||||
"derive_builder",
|
||||
"env_logger",
|
||||
"fuzzy-matcher",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"nix 0.25.1",
|
||||
"rayon",
|
||||
"regex",
|
||||
"shlex",
|
||||
"time 0.3.22",
|
||||
"time 0.3.23",
|
||||
"timer",
|
||||
"tuikit",
|
||||
"unicode-width",
|
||||
@ -741,9 +657,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
version = "2.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -770,30 +686,24 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
||||
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -819,9 +729,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.22"
|
||||
version = "0.3.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd"
|
||||
checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"time-core",
|
||||
@ -858,9 +768,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.9"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
@ -928,7 +838,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
skim = "0.10.2"
|
||||
skim = { version = "0.10.2", default-features = false }
|
||||
cxx = "1.0.83"
|
||||
term = "0.7.0"
|
||||
|
||||
|
@ -319,24 +319,21 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
|
||||
|
||||
Packet packet;
|
||||
try
|
||||
{
|
||||
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
|
||||
|
||||
try
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
|
||||
{
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
|
||||
{
|
||||
/// Exception may happen when packet is received, e.g. when got unknown packet.
|
||||
/// In this case, invalidate replica, so that we would not read from it anymore.
|
||||
current_connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
throw;
|
||||
/// Exception may happen when packet is received, e.g. when got unknown packet.
|
||||
/// In this case, invalidate replica, so that we would not read from it anymore.
|
||||
current_connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
switch (packet.type)
|
||||
|
@ -5,7 +5,6 @@ namespace DB
|
||||
|
||||
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
|
||||
{
|
||||
createFiber();
|
||||
}
|
||||
|
||||
void AsyncTaskExecutor::resume()
|
||||
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
|
||||
if (routine_is_finished)
|
||||
return;
|
||||
|
||||
/// Create fiber lazily on first resume() call.
|
||||
if (!fiber)
|
||||
createFiber();
|
||||
|
||||
if (!checkBeforeTaskResume())
|
||||
return;
|
||||
|
||||
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
|
||||
return;
|
||||
|
||||
resumeUnlocked();
|
||||
|
||||
/// Destroy fiber when it's finished.
|
||||
if (routine_is_finished)
|
||||
destroyFiber();
|
||||
|
||||
if (exception)
|
||||
processException(exception);
|
||||
}
|
||||
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
|
||||
void AsyncTaskExecutor::restart()
|
||||
{
|
||||
std::lock_guard guard(fiber_lock);
|
||||
if (fiber)
|
||||
if (!routine_is_finished)
|
||||
destroyFiber();
|
||||
createFiber();
|
||||
routine_is_finished = false;
|
||||
}
|
||||
|
||||
|
56
src/Common/HTTPHeaderFilter.cpp
Normal file
56
src/Common/HTTPHeaderFilter.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::checkHeaders(const HTTPHeaderEntries & entries) const
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
for (const auto & entry : entries)
|
||||
{
|
||||
if (forbidden_headers.contains(entry.name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
|
||||
for (const auto & header_regex : forbidden_headers_regexp)
|
||||
if (re2::RE2::FullMatch(entry.name, header_regex))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
}
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
if (config.has("http_forbid_headers"))
|
||||
{
|
||||
std::vector<std::string> keys;
|
||||
config.keys("http_forbid_headers", keys);
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "header_regexp"))
|
||||
forbidden_headers_regexp.push_back(config.getString("http_forbid_headers." + key));
|
||||
else if (startsWith(key, "header"))
|
||||
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
forbidden_headers.clear();
|
||||
forbidden_headers_regexp.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
src/Common/HTTPHeaderFilter.h
Normal file
27
src/Common/HTTPHeaderFilter.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/HTTPHeaderEntries.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class HTTPHeaderFilter
|
||||
{
|
||||
public:
|
||||
|
||||
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
void checkHeaders(const HTTPHeaderEntries & entries) const;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> forbidden_headers;
|
||||
std::vector<std::string> forbidden_headers_regexp;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config)
|
||||
initializeDisks(config);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix)
|
||||
{
|
||||
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
|
||||
|
||||
using namespace std::literals;
|
||||
static constexpr std::array supported_disk_types
|
||||
{
|
||||
"s3"sv,
|
||||
"s3_plain"sv,
|
||||
"local"sv
|
||||
};
|
||||
|
||||
if (std::all_of(
|
||||
supported_disk_types.begin(),
|
||||
supported_disk_types.end(),
|
||||
[&](const auto supported_type) { return disk_type != supported_type; }))
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance());
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
|
||||
|
||||
log_storage = getLogsPathFromConfig(config);
|
||||
|
||||
|
@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)
|
||||
|
||||
/** Components of DecimalX value:
|
||||
* whole - represents whole part of decimal, can be negative or positive.
|
||||
* fractional - for fractional part of decimal, always positive.
|
||||
* fractional - for fractional part of decimal.
|
||||
*
|
||||
* 0.123 represents 0 / 0.123
|
||||
* -0.123 represents 0 / -0.123
|
||||
* -1.123 represents -1 / 0.123
|
||||
*/
|
||||
template <typename DecimalType>
|
||||
struct DecimalComponents
|
||||
|
@ -3,6 +3,7 @@
|
||||
#if USE_MYSQL
|
||||
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
|
||||
}
|
||||
}
|
||||
|
||||
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
|
||||
{
|
||||
String table_name;
|
||||
String database_name;
|
||||
if (ddl.empty()) return std::make_tuple(database_name, table_name);
|
||||
|
||||
bool parse_failed = false;
|
||||
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
ASTPtr res;
|
||||
ASTPtr table;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
parse_failed = true;
|
||||
}
|
||||
if (!parse_failed)
|
||||
{
|
||||
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
|
||||
{
|
||||
database_name = table_id.database_name;
|
||||
table_name = table_id.table_name;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(database_name, table_name);
|
||||
}
|
||||
|
||||
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
||||
ContextPtr context_,
|
||||
const String & database_name_,
|
||||
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
||||
String query = query_event.query;
|
||||
if (!materialized_tables_list.empty())
|
||||
{
|
||||
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query);
|
||||
|
||||
if (!ddl_table_name.empty())
|
||||
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
||||
if (!table_id.table_name.empty())
|
||||
{
|
||||
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name;
|
||||
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
|
||||
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
|
||||
{
|
||||
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query);
|
||||
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct ParseTableIDFromDDLTestCase
|
||||
{
|
||||
String query;
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
ParseTableIDFromDDLTestCase(
|
||||
const String & query_,
|
||||
const String & database_name_,
|
||||
const String & table_name_)
|
||||
: query(query_)
|
||||
, database_name(database_name_)
|
||||
, table_name(table_name_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
|
||||
{
|
||||
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
|
||||
}
|
||||
|
||||
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
|
||||
{
|
||||
};
|
||||
|
||||
TEST_P(ParseTableIDFromDDLTest, parse)
|
||||
{
|
||||
const auto & [query, expected_database_name, expected_table_name] = GetParam();
|
||||
auto table_id = tryParseTableIDFromDDL(query, "default");
|
||||
EXPECT_EQ(expected_database_name, table_id.database_name);
|
||||
EXPECT_EQ(expected_table_name, table_id.table_name);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
|
||||
{
|
||||
"SELECT * FROM db.table",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE table1",
|
||||
"default",
|
||||
"table1"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP DATABASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"DROP DATA`BASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"NOT A SQL",
|
||||
"",
|
||||
""
|
||||
},
|
||||
|
||||
}));
|
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
|
||||
{
|
||||
bool is_ddl = false;
|
||||
Tokens tokens(query.data(), query.data() + query.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
|
||||
ASTPtr table;
|
||||
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
return StorageID::createEmpty();
|
||||
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
|
||||
if (table_id.database_name.empty())
|
||||
table_id.database_name = default_database_name;
|
||||
return table_id;
|
||||
}
|
||||
|
||||
}
|
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
|
||||
|
||||
}
|
@ -257,7 +257,6 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
|
||||
const auto & headers_prefix = settings_config_prefix + ".headers";
|
||||
|
||||
|
||||
if (config.has(headers_prefix))
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys config_keys;
|
||||
@ -297,7 +296,10 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
|
||||
|
||||
if (created_from_ddl)
|
||||
{
|
||||
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
|
||||
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
|
||||
}
|
||||
|
||||
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const
|
||||
}
|
||||
|
||||
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(config_prefix, keys);
|
||||
@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
|
||||
|
||||
auto disk_config_prefix = config_prefix + "." + disk_name;
|
||||
|
||||
if (disk_validator && !disk_validator(config, disk_config_prefix))
|
||||
continue;
|
||||
|
||||
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
|
||||
}
|
||||
if (!has_default_disk)
|
||||
|
@ -23,7 +23,8 @@ public:
|
||||
DiskSelector() = default;
|
||||
DiskSelector(const DiskSelector & from) = default;
|
||||
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
|
||||
using DiskValidator = std::function<bool(const Poco::Util::AbstractConfiguration & config, const String & disk_config_prefix)>;
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {});
|
||||
|
||||
DiskSelectorPtr updateFromConfig(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
|
@ -23,10 +23,6 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_)
|
||||
: TemporaryFileOnDisk(disk_, "")
|
||||
{}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope)
|
||||
: TemporaryFileOnDisk(disk_)
|
||||
{
|
||||
|
@ -16,9 +16,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class TemporaryFileOnDisk
|
||||
{
|
||||
public:
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix = "tmp");
|
||||
|
||||
~TemporaryFileOnDisk();
|
||||
|
||||
|
@ -19,6 +19,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr auto microsecond_multiplier = 1000000;
|
||||
static constexpr auto millisecond_multiplier = 1000;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -1377,6 +1380,36 @@ struct ToRelativeSecondNumImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
template <Int64 scale_multiplier>
|
||||
struct ToRelativeSubsecondNumImpl
|
||||
{
|
||||
static constexpr auto name = "toRelativeSubsecondNumImpl";
|
||||
|
||||
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
|
||||
{
|
||||
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
|
||||
if (scale == scale_multiplier)
|
||||
return t.value;
|
||||
if (scale > scale_multiplier)
|
||||
return t.value / (scale / scale_multiplier);
|
||||
return t.value * (scale_multiplier / scale);
|
||||
}
|
||||
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
|
||||
{
|
||||
return t * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToYYYYMMImpl
|
||||
{
|
||||
static constexpr auto name = "toYYYYMM";
|
||||
@ -1476,25 +1509,47 @@ struct ToYYYYMMDDhhmmssImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
|
||||
{
|
||||
UInt16 millisecond;
|
||||
UInt16 microsecond;
|
||||
};
|
||||
|
||||
struct ToDateTimeComponentsImpl
|
||||
{
|
||||
static constexpr auto name = "toDateTimeComponents";
|
||||
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(t);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
|
||||
if (t.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(ExtendedDayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(DayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
|
@ -79,28 +79,51 @@ namespace impl
|
||||
UInt64 key1 = 0;
|
||||
};
|
||||
|
||||
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
|
||||
struct SipHashKeyColumns
|
||||
{
|
||||
SipHashKey ret{};
|
||||
ColumnPtr key0;
|
||||
ColumnPtr key1;
|
||||
bool is_const;
|
||||
|
||||
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
|
||||
size_t size() const
|
||||
{
|
||||
assert(key0 && key1);
|
||||
assert(key0->size() == key1->size());
|
||||
return key0->size();
|
||||
}
|
||||
SipHashKey getKey(size_t i) const
|
||||
{
|
||||
if (is_const)
|
||||
i = 0;
|
||||
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
|
||||
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
|
||||
return {key0data[i], key1data[i]};
|
||||
}
|
||||
};
|
||||
|
||||
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
const ColumnTuple * tuple = nullptr;
|
||||
const auto * column = key.column.get();
|
||||
bool is_const = false;
|
||||
if (isColumnConst(*column))
|
||||
{
|
||||
is_const = true;
|
||||
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
|
||||
}
|
||||
else
|
||||
tuple = checkAndGetColumn<ColumnTuple>(column);
|
||||
if (!tuple)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
|
||||
|
||||
if (tuple->tupleSize() != 2)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
if (tuple->empty())
|
||||
return ret;
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
else
|
||||
SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
|
||||
assert(ret.key0);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key0))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
|
||||
|
||||
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
|
||||
ret.key1 = key1col->get64(0);
|
||||
else
|
||||
assert(ret.key1);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key1))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
|
||||
|
||||
return ret;
|
||||
@ -329,8 +352,10 @@ struct SipHash64KeyedImpl
|
||||
static constexpr auto name = "sipHash64Keyed";
|
||||
using ReturnType = UInt64;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -371,8 +396,10 @@ struct SipHash128KeyedImpl
|
||||
static constexpr auto name = "sipHash128Keyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -398,13 +425,43 @@ struct SipHash128ReferenceImpl
|
||||
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
struct SipHash128ReferenceKeyedImpl
|
||||
{
|
||||
static constexpr auto name = "sipHash128ReferenceKeyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
|
||||
{
|
||||
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
|
||||
}
|
||||
|
||||
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
UInt128 tmp;
|
||||
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
|
||||
h1 = tmp;
|
||||
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
|
||||
h2 = tmp;
|
||||
#endif
|
||||
UInt128 hashes[] = {h1, h2};
|
||||
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/** Why we need MurmurHash2?
|
||||
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
||||
@ -1023,7 +1080,7 @@ private:
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, bool Keyed, typename KeyType>
|
||||
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
|
||||
class FunctionAnyHash : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -1033,9 +1090,12 @@ private:
|
||||
using ToType = typename Impl::ReturnType;
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1044,6 +1104,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
@ -1077,6 +1140,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
ToType hash;
|
||||
|
||||
@ -1107,8 +1178,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1116,9 +1194,12 @@ private:
|
||||
}
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1127,6 +1208,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
else
|
||||
@ -1143,6 +1227,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
|
||||
ToType hash;
|
||||
@ -1158,8 +1250,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1167,10 +1266,16 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0, size = column->size(); i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
StringRef bytes = column->getDataAt(i);
|
||||
const ToType hash = apply(key, bytes.data, bytes.size);
|
||||
if constexpr (first)
|
||||
@ -1181,8 +1286,11 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
|
||||
{
|
||||
const typename ColumnString::Chars & data = col_from->getChars();
|
||||
@ -1192,6 +1300,9 @@ private:
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key,
|
||||
reinterpret_cast<const char *>(&data[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
@ -1212,6 +1323,9 @@ private:
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if constexpr (first)
|
||||
vec_to[i] = hash;
|
||||
@ -1221,6 +1335,14 @@ private:
|
||||
}
|
||||
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeString<first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
String value = col_from_const->getValue<String>();
|
||||
const ToType hash = apply(key, value.data(), value.size());
|
||||
const size_t size = vec_to.size();
|
||||
@ -1228,8 +1350,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1237,7 +1366,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
|
||||
|
||||
@ -1249,13 +1378,19 @@ private:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||
bool nested_is_first = true;
|
||||
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
const size_t size = offsets.size();
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
ColumnArray::Offset next_offset = offsets[i];
|
||||
|
||||
ToType hash;
|
||||
@ -1279,7 +1414,7 @@ private:
|
||||
{
|
||||
/// NOTE: here, of course, you can do without the materialization of the column.
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
executeArray<first>(key, type, full_column.get(), vec_to);
|
||||
executeArray<first>(key_cols, type, full_column.get(), vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1287,7 +1422,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
WhichDataType which(from_type);
|
||||
|
||||
@ -1295,40 +1430,45 @@ private:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
|
||||
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
if constexpr (Keyed)
|
||||
if ((!key_cols.is_const && key_cols.size() != vec_to.size())
|
||||
|| (key_cols.is_const && key_cols.size() != 1))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key_cols, icolumn, vec_to);
|
||||
}
|
||||
|
||||
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
{
|
||||
/// Flattening of tuples.
|
||||
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
|
||||
@ -1337,7 +1477,7 @@ private:
|
||||
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
|
||||
size_t tuple_size = tuple_columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
}
|
||||
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
|
||||
{
|
||||
@ -1347,24 +1487,24 @@ private:
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
auto tmp = ColumnConst::create(tuple_columns[i], column->size());
|
||||
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
}
|
||||
}
|
||||
else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
|
||||
{
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
}
|
||||
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
|
||||
{
|
||||
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_first)
|
||||
executeAny<true>(key, type, column, vec_to);
|
||||
executeAny<true>(key_cols, type, column, vec_to);
|
||||
else
|
||||
executeAny<false>(key, type, column, vec_to);
|
||||
executeAny<false>(key_cols, type, column, vec_to);
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
@ -1395,30 +1535,33 @@ public:
|
||||
{
|
||||
auto col_to = ColumnVector<ToType>::create(input_rows_count);
|
||||
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
|
||||
if (arguments.size() <= first_data_argument)
|
||||
if (input_rows_count != 0)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key = Impl::parseKey(arguments[0]);
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
if (arguments.size() <= first_data_argument)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
KeyColumnsType key_cols{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key_cols = Impl::parseKeyColumns(arguments[0]);
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
@ -1450,17 +1593,19 @@ public:
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
|
||||
{
|
||||
public:
|
||||
explicit FunctionAnyHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1696,7 +1841,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
||||
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
|
||||
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
|
||||
#if USE_SSL
|
||||
@ -1710,8 +1855,10 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
|
||||
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
|
||||
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
|
||||
using FunctionSipHash128ReferenceKeyed
|
||||
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
|
||||
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
|
||||
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
|
||||
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
|
||||
|
@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing)
|
||||
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
|
||||
.categories{"Hash"}
|
||||
});
|
||||
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
|
||||
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
|
||||
"instead of using a fixed key.",
|
||||
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
|
||||
.categories{"Hash"}});
|
||||
factory.registerFunction<FunctionCityHash64>();
|
||||
factory.registerFunction<FunctionFarmFingerprint64>();
|
||||
factory.registerFunction<FunctionFarmHash64>();
|
||||
|
@ -7,8 +7,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
*
|
||||
* Functions for extraction parts of URL.
|
||||
* If URL has nothing like, then empty string is returned.
|
||||
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
||||
@ -156,7 +156,7 @@ struct CutSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
*
|
||||
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
|
||||
* invokes Transform::execute() with either:
|
||||
@ -80,7 +80,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
if (t.value < 0 && components.fractional)
|
||||
--components.whole;
|
||||
|
||||
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include "FunctionsURL.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -54,4 +54,3 @@ struct ExtractProtocol
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -174,12 +174,13 @@ public:
|
||||
{
|
||||
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
|
||||
- static_cast<Int64>(transform_x.execute(x, timezone_x));
|
||||
DateLUTImpl::DateTimeComponents a_comp;
|
||||
DateLUTImpl::DateTimeComponents b_comp;
|
||||
DateTimeComponentsWithFractionalPart a_comp;
|
||||
DateTimeComponentsWithFractionalPart b_comp;
|
||||
Int64 adjust_value;
|
||||
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
if (x_seconds <= y_seconds)
|
||||
auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
|
||||
if (x_microseconds <= y_microseconds)
|
||||
{
|
||||
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
@ -192,14 +193,16 @@ public:
|
||||
adjust_value = 1;
|
||||
}
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.date.month > b_comp.date.month)
|
||||
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -210,8 +213,9 @@ public:
|
||||
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -219,8 +223,9 @@ public:
|
||||
if ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -230,25 +235,44 @@ public:
|
||||
if ((x_day_of_week > y_day_of_week)
|
||||
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if (a_comp.time.second > b_comp.time.second)
|
||||
if ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
|
||||
{
|
||||
if (a_comp.microsecond > b_comp.microsecond)
|
||||
res += adjust_value;
|
||||
}
|
||||
return res;
|
||||
@ -373,6 +397,10 @@ public:
|
||||
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "second" || unit == "ss" || unit == "s")
|
||||
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "millisecond" || unit == "ms")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "microsecond" || unit == "us" || unit == "u")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} does not support '{}' unit", getName(), unit);
|
||||
|
42
src/Functions/firstLine.cpp
Normal file
42
src/Functions/firstLine.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FirstLine
|
||||
{
|
||||
static size_t getReserveLengthForElement() { return 16; }
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
res_data = data;
|
||||
|
||||
const Pos end = data + size;
|
||||
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
|
||||
res_size = pos - data;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameFirstLine
|
||||
{
|
||||
static constexpr auto name = "firstLine";
|
||||
};
|
||||
|
||||
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
|
||||
|
||||
REGISTER_FUNCTION(FirstLine)
|
||||
{
|
||||
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
|
||||
.description = "Returns first line of a multi-line string.",
|
||||
.syntax = "firstLine(string)",
|
||||
.arguments = {{.name = "string", .description = "The string to process."}},
|
||||
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
|
||||
.examples = {
|
||||
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
|
||||
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
|
||||
}});
|
||||
}
|
||||
}
|
@ -119,7 +119,7 @@ public:
|
||||
|
||||
if (!lhs_array->hasEqualOffsets(*rhs_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array offsets",
|
||||
"The argument 2 and argument {} of function {} have different array offsets",
|
||||
i + 1,
|
||||
getName());
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
|
@ -97,6 +97,10 @@ UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
|
||||
|
||||
for (const auto & setting : settings.allChanged())
|
||||
{
|
||||
/// We don't consider this setting because it is only for deduplication,
|
||||
/// which means we can put two inserts with different tokens in the same block safely.
|
||||
if (setting.getName() == "insert_deduplication_token")
|
||||
continue;
|
||||
siphash.update(setting.getName());
|
||||
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
|
||||
}
|
||||
@ -111,9 +115,10 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
|
||||
return query_str == other.query_str && settings == other.settings;
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_)
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, const String & async_dedup_token_, MemoryTracker * user_memory_tracker_)
|
||||
: bytes(std::move(bytes_))
|
||||
, query_id(std::move(query_id_))
|
||||
, async_dedup_token(async_dedup_token_)
|
||||
, user_memory_tracker(user_memory_tracker_)
|
||||
, create_time(std::chrono::system_clock::now())
|
||||
{
|
||||
@ -227,7 +232,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
/// to avoid buffering of huge amount of data in memory.
|
||||
|
||||
auto read_buf = getReadBufferFromASTInsertQuery(query);
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {});
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
|
||||
|
||||
WriteBufferFromString write_buf(bytes);
|
||||
copyData(limit_buf, write_buf);
|
||||
@ -253,7 +258,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
if (auto quota = query_context->getQuota())
|
||||
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
|
||||
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker());
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), settings.insert_deduplication_token, CurrentThread::getUserMemoryTracker());
|
||||
|
||||
InsertQuery key{query, settings};
|
||||
InsertDataPtr data_to_process;
|
||||
@ -517,7 +522,7 @@ try
|
||||
|
||||
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
|
||||
std::unique_ptr<ReadBuffer> last_buffer;
|
||||
auto chunk_info = std::make_shared<ChunkOffsets>();
|
||||
auto chunk_info = std::make_shared<AsyncInsertInfo>();
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
|
||||
@ -526,6 +531,7 @@ try
|
||||
size_t num_rows = executor.execute(*buffer);
|
||||
total_rows += num_rows;
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
|
||||
/// Keep buffer, because it still can be used
|
||||
/// in destructor, while resetting buffer at next iteration.
|
||||
|
@ -69,10 +69,11 @@ private:
|
||||
public:
|
||||
String bytes;
|
||||
const String query_id;
|
||||
const String async_dedup_token;
|
||||
MemoryTracker * const user_memory_tracker;
|
||||
const std::chrono::time_point<std::chrono::system_clock> create_time;
|
||||
|
||||
Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_);
|
||||
Entry(String && bytes_, String && query_id_, const String & async_dedup_token, MemoryTracker * user_memory_tracker_);
|
||||
|
||||
void finish(std::exception_ptr exception_ = nullptr);
|
||||
std::future<void> getFuture() { return promise.get_future(); }
|
||||
|
@ -98,6 +98,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/EnumReflection.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||
@ -327,9 +328,10 @@ struct ContextSharedPart : boost::noncopyable
|
||||
OrdinaryBackgroundExecutorPtr fetch_executor;
|
||||
OrdinaryBackgroundExecutorPtr common_executor;
|
||||
|
||||
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||
HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml
|
||||
|
||||
std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
||||
std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
||||
|
||||
/// Clusters for distributed tables
|
||||
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
|
||||
@ -2963,6 +2965,16 @@ const RemoteHostFilter & Context::getRemoteHostFilter() const
|
||||
return shared->remote_host_filter;
|
||||
}
|
||||
|
||||
void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
shared->http_header_filter.setValuesFromConfig(config);
|
||||
}
|
||||
|
||||
const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const
|
||||
{
|
||||
return shared->http_header_filter;
|
||||
}
|
||||
|
||||
UInt16 Context::getTCPPort() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <Common/MultiVersion.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Common/Throttler_fwd.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -766,6 +767,10 @@ public:
|
||||
void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config);
|
||||
const RemoteHostFilter & getRemoteHostFilter() const;
|
||||
|
||||
/// Storage of forbidden HTTP headers from config.xml
|
||||
void setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config);
|
||||
const HTTPHeaderFilter & getHTTPHeaderFilter() const;
|
||||
|
||||
/// The port that the server listens for executing SQL queries.
|
||||
UInt16 getTCPPort() const;
|
||||
|
||||
|
@ -118,7 +118,10 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
|
||||
if (table_element->table_join && isLeft(table_element->table_join->as<ASTTableJoin>()->kind))
|
||||
continue; /// Skip right table optimization
|
||||
|
||||
if (table_element->table_join && isFull(table_element->table_join->as<ASTTableJoin>()->kind))
|
||||
if (table_element->table_join && (
|
||||
isFull(table_element->table_join->as<ASTTableJoin>()->kind)
|
||||
|| table_element->table_join->as<ASTTableJoin>()->strictness == JoinStrictness::Asof
|
||||
|| table_element->table_join->as<ASTTableJoin>()->strictness == JoinStrictness::Anti))
|
||||
break; /// Skip left and right table optimization
|
||||
|
||||
is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos],
|
||||
|
@ -114,16 +114,20 @@ private:
|
||||
|
||||
using Chunks = std::vector<Chunk>;
|
||||
|
||||
/// ChunkOffsets marks offsets of different sub-chunks, which will be used by async inserts.
|
||||
class ChunkOffsets : public ChunkInfo
|
||||
/// AsyncInsert needs two kinds of information:
|
||||
/// - offsets of different sub-chunks
|
||||
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
||||
class AsyncInsertInfo : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
ChunkOffsets() = default;
|
||||
explicit ChunkOffsets(const std::vector<size_t> & offsets_) : offsets(offsets_) {}
|
||||
AsyncInsertInfo() = default;
|
||||
explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}
|
||||
|
||||
std::vector<size_t> offsets;
|
||||
std::vector<String> tokens;
|
||||
};
|
||||
|
||||
using ChunkOffsetsPtr = std::shared_ptr<ChunkOffsets>;
|
||||
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
||||
|
||||
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
||||
class ChunkMissingValues : public ChunkInfo
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
{
|
||||
if (!allow_missing_columns)
|
||||
throw Exception(
|
||||
ErrorCodes::THERE_IS_NO_COLUMN, "Not found field({}) in arrow schema:{}.", named_col.name, schema.ToString());
|
||||
ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the following Arrow schema:\n{}\n", named_col.name, schema.ToString());
|
||||
else
|
||||
continue;
|
||||
}
|
||||
@ -168,4 +168,3 @@ private:
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -52,6 +52,8 @@
|
||||
#include <Poco/Buffer.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
||||
#include <Poco/Net/HTTPCredentials.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Poco/URI.h>
|
||||
@ -934,6 +936,27 @@ private:
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
request.setHost(url.getHost());
|
||||
|
||||
if (!url.getUserInfo().empty())
|
||||
{
|
||||
Poco::Net::HTTPCredentials http_credentials;
|
||||
Poco::Net::HTTPBasicCredentials http_basic_credentials;
|
||||
|
||||
http_credentials.fromUserInfo(url.getUserInfo());
|
||||
|
||||
std::string decoded_username;
|
||||
Poco::URI::decode(http_credentials.getUsername(), decoded_username);
|
||||
http_basic_credentials.setUsername(decoded_username);
|
||||
|
||||
if (!http_credentials.getPassword().empty())
|
||||
{
|
||||
std::string decoded_password;
|
||||
Poco::URI::decode(http_credentials.getPassword(), decoded_password);
|
||||
http_basic_credentials.setPassword(decoded_password);
|
||||
}
|
||||
|
||||
http_basic_credentials.authenticate(request);
|
||||
}
|
||||
|
||||
auto session = makePooledHTTPSession(url, timeouts, 1);
|
||||
session->sendRequest(request);
|
||||
|
||||
|
@ -13,7 +13,8 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_SKIP_UNKNOWN_FIELD;
|
||||
}
|
||||
|
||||
BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
template <bool with_defaults>
|
||||
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header,
|
||||
in_,
|
||||
@ -22,16 +23,17 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & heade
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::make_unique<BinaryFormatReader>(in_, format_settings_))
|
||||
std::make_unique<BinaryFormatReader<with_defaults>>(in_, format_settings_))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
|
||||
template <bool with_defaults>
|
||||
BinaryFormatReader<with_defaults>::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readHeaderRow()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readHeaderRow()
|
||||
{
|
||||
std::vector<String> fields;
|
||||
String field;
|
||||
@ -43,13 +45,15 @@ std::vector<String> BinaryFormatReader::readHeaderRow()
|
||||
return fields;
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readNames()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readNames()
|
||||
{
|
||||
readVarUInt(read_columns, *in);
|
||||
return readHeaderRow();
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readTypes()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readTypes()
|
||||
{
|
||||
auto types = readHeaderRow();
|
||||
for (const auto & type_name : types)
|
||||
@ -57,26 +61,40 @@ std::vector<String> BinaryFormatReader::readTypes()
|
||||
return types;
|
||||
}
|
||||
|
||||
bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
|
||||
template <bool with_defaults>
|
||||
bool BinaryFormatReader<with_defaults>::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
|
||||
{
|
||||
if constexpr (with_defaults)
|
||||
{
|
||||
UInt8 is_default;
|
||||
readBinary(is_default, *in);
|
||||
if (is_default)
|
||||
{
|
||||
column.insertDefault();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
serialization->deserializeBinary(column, *in, format_settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipHeaderRow()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipHeaderRow()
|
||||
{
|
||||
String tmp;
|
||||
for (size_t i = 0; i < read_columns; ++i)
|
||||
readStringBinary(tmp, *in);
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipNames()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipNames()
|
||||
{
|
||||
readVarUInt(read_columns, *in);
|
||||
skipHeaderRow();
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipTypes()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipTypes()
|
||||
{
|
||||
if (read_columns == 0)
|
||||
{
|
||||
@ -87,7 +105,8 @@ void BinaryFormatReader::skipTypes()
|
||||
skipHeaderRow();
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipField(size_t file_column)
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipField(size_t file_column)
|
||||
{
|
||||
if (file_column >= read_data_types.size())
|
||||
throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD,
|
||||
@ -111,12 +130,21 @@ void registerInputFormatRowBinary(FormatFactory & factory)
|
||||
const IRowInputFormat::Params & params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<BinaryRowInputFormat>(buf, sample, params, with_names, with_types, settings);
|
||||
return std::make_shared<BinaryRowInputFormat<false>>(buf, sample, params, with_names, with_types, settings);
|
||||
});
|
||||
};
|
||||
|
||||
registerWithNamesAndTypes("RowBinary", register_func);
|
||||
factory.registerFileExtension("bin", "RowBinary");
|
||||
|
||||
factory.registerInputFormat("RowBinaryWithDefaults", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const IRowInputFormat::Params & params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<BinaryRowInputFormat<true>>(buf, sample, params, false, false, settings);
|
||||
});
|
||||
}
|
||||
|
||||
void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
|
||||
@ -125,6 +153,8 @@ void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<BinaryWithNamesAndTypesSchemaReader>(buf, settings);
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@ class ReadBuffer;
|
||||
|
||||
/** A stream for inputting data in a binary line-by-line format.
|
||||
*/
|
||||
template <bool with_defaults = false>
|
||||
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
||||
{
|
||||
public:
|
||||
@ -25,6 +26,7 @@ public:
|
||||
std::string getDiagnosticInfo() override { return {}; }
|
||||
};
|
||||
|
||||
template <bool with_defaults = false>
|
||||
class BinaryFormatReader final : public FormatWithNamesAndTypesReader
|
||||
{
|
||||
public:
|
||||
@ -54,7 +56,7 @@ public:
|
||||
BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
|
||||
|
||||
private:
|
||||
BinaryFormatReader reader;
|
||||
BinaryFormatReader<false> reader;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -64,23 +64,131 @@ namespace ErrorCodes
|
||||
}
|
||||
namespace
|
||||
{
|
||||
/// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency.
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match);
|
||||
|
||||
/*
|
||||
* When `{...}` has any `/`s, it must be processed in a different way:
|
||||
* Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...},
|
||||
* LSWithFoldedRegexpMatching is in charge from now on.
|
||||
* It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
|
||||
* Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
|
||||
* StorageFile.cpp has the same logic.
|
||||
*/
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithFoldedRegexpMatching(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & processed_suffix,
|
||||
const String & suffix_with_globs,
|
||||
re2::RE2 & matcher,
|
||||
const size_t max_depth,
|
||||
const size_t next_slash_after_glob_pos)
|
||||
{
|
||||
/// We don't need to go all the way in every directory if max_depth is reached
|
||||
/// as it is upper limit of depth by simply counting `/`s in curly braces
|
||||
if (!max_depth)
|
||||
return {};
|
||||
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length);
|
||||
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
|
||||
{
|
||||
// ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
|
||||
throw Exception(
|
||||
ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError()));
|
||||
}
|
||||
|
||||
std::vector<StorageHDFS::PathWithInfo> result;
|
||||
|
||||
if (!ls.file_info && ls.length > 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
|
||||
|
||||
for (int i = 0; i < ls.length; ++i)
|
||||
{
|
||||
const String full_path = String(ls.file_info[i].mName);
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String dir_or_file_name = full_path.substr(last_slash);
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
|
||||
if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
|
||||
{
|
||||
if (next_slash_after_glob_pos == std::string::npos)
|
||||
{
|
||||
result.emplace_back(
|
||||
String(ls.file_info[i].mName),
|
||||
StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)});
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(
|
||||
fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos));
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
}
|
||||
else if (is_directory)
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithFoldedRegexpMatching(
|
||||
fs::path(full_path), fs, processed_suffix + dir_or_file_name,
|
||||
suffix_with_globs, matcher, max_depth - 1, next_slash_after_glob_pos);
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Recursive directory listing with matched paths as a result.
|
||||
* Have the same method in StorageFile.
|
||||
*/
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
|
||||
const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match)
|
||||
{
|
||||
const size_t first_glob = for_match.find_first_of("*?{");
|
||||
const size_t first_glob_pos = for_match.find_first_of("*?{");
|
||||
const bool has_glob = first_glob_pos != std::string::npos;
|
||||
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
|
||||
const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
|
||||
const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
|
||||
|
||||
const size_t next_slash = suffix_with_globs.find('/', 1);
|
||||
re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)));
|
||||
size_t slashes_in_glob = 0;
|
||||
const size_t next_slash_after_glob_pos = [&]()
|
||||
{
|
||||
if (!has_glob)
|
||||
return suffix_with_globs.find('/', 1);
|
||||
|
||||
size_t in_curly = 0;
|
||||
for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
|
||||
{
|
||||
if (*it == '{')
|
||||
++in_curly;
|
||||
else if (*it == '/')
|
||||
{
|
||||
if (in_curly)
|
||||
++slashes_in_glob;
|
||||
else
|
||||
return size_t(std::distance(suffix_with_globs.begin(), it));
|
||||
}
|
||||
else if (*it == '}')
|
||||
--in_curly;
|
||||
}
|
||||
return std::string::npos;
|
||||
}();
|
||||
|
||||
const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
|
||||
|
||||
re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
|
||||
if (!matcher.ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"Cannot compile regex from glob ({}): {}", for_match, matcher.error());
|
||||
|
||||
if (slashes_in_glob)
|
||||
{
|
||||
return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs,
|
||||
matcher, slashes_in_glob, next_slash_after_glob_pos);
|
||||
}
|
||||
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
|
||||
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
|
||||
@ -97,7 +205,7 @@ namespace
|
||||
const String full_path = String(ls.file_info[i].mName);
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String file_name = full_path.substr(last_slash);
|
||||
const bool looking_for_directory = next_slash != std::string::npos;
|
||||
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
/// Condition with type of current file_info means what kind of path is it in current iteration of ls
|
||||
if (!is_directory && !looking_for_directory)
|
||||
@ -111,7 +219,7 @@ namespace
|
||||
{
|
||||
if (re2::RE2::FullMatch(file_name, matcher))
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos));
|
||||
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
|
@ -171,23 +171,23 @@ void MergeTreeDataWriter::TemporaryPart::finalize()
|
||||
projection->getDataPartStorage().precommitTransaction();
|
||||
}
|
||||
|
||||
std::vector<ChunkOffsetsPtr> scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num)
|
||||
std::vector<AsyncInsertInfoPtr> scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr async_insert_info, const IColumn::Selector & selector, size_t partition_num)
|
||||
{
|
||||
if (nullptr == chunk_offsets)
|
||||
if (nullptr == async_insert_info)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
if (selector.empty())
|
||||
{
|
||||
return {chunk_offsets};
|
||||
return {async_insert_info};
|
||||
}
|
||||
std::vector<ChunkOffsetsPtr> result(partition_num);
|
||||
std::vector<AsyncInsertInfoPtr> result(partition_num);
|
||||
std::vector<Int64> last_row_for_partition(partition_num, -1);
|
||||
size_t offset_idx = 0;
|
||||
for (size_t i = 0; i < selector.size(); ++i)
|
||||
{
|
||||
++last_row_for_partition[selector[i]];
|
||||
if (i + 1 == chunk_offsets->offsets[offset_idx])
|
||||
if (i + 1 == async_insert_info->offsets[offset_idx])
|
||||
{
|
||||
for (size_t part_id = 0; part_id < last_row_for_partition.size(); ++part_id)
|
||||
{
|
||||
@ -196,9 +196,12 @@ std::vector<ChunkOffsetsPtr> scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs
|
||||
continue;
|
||||
size_t offset = static_cast<size_t>(last_row + 1);
|
||||
if (result[part_id] == nullptr)
|
||||
result[part_id] = std::make_shared<ChunkOffsets>();
|
||||
result[part_id] = std::make_shared<AsyncInsertInfo>();
|
||||
if (result[part_id]->offsets.empty() || offset > *result[part_id]->offsets.rbegin())
|
||||
{
|
||||
result[part_id]->offsets.push_back(offset);
|
||||
result[part_id]->tokens.push_back(async_insert_info->tokens[offset_idx]);
|
||||
}
|
||||
}
|
||||
++offset_idx;
|
||||
}
|
||||
@ -207,7 +210,7 @@ std::vector<ChunkOffsetsPtr> scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs
|
||||
}
|
||||
|
||||
BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets)
|
||||
const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info)
|
||||
{
|
||||
BlocksWithPartition result;
|
||||
if (!block || !block.rows())
|
||||
@ -218,8 +221,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned.
|
||||
{
|
||||
result.emplace_back(Block(block), Row{});
|
||||
if (chunk_offsets != nullptr)
|
||||
result[0].offsets = std::move(chunk_offsets->offsets);
|
||||
if (async_insert_info != nullptr)
|
||||
{
|
||||
result[0].offsets = std::move(async_insert_info->offsets);
|
||||
result[0].tokens = std::move(async_insert_info->tokens);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -236,7 +242,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
IColumn::Selector selector;
|
||||
buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts);
|
||||
|
||||
auto chunk_offsets_with_partition = scatterOffsetsBySelector(chunk_offsets, selector, partition_num_to_first_row.size());
|
||||
auto async_insert_info_with_partition = scatterAsyncInsertInfoBySelector(async_insert_info, selector, partition_num_to_first_row.size());
|
||||
|
||||
size_t partitions_count = partition_num_to_first_row.size();
|
||||
result.reserve(partitions_count);
|
||||
@ -255,8 +261,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
/// NOTE: returning a copy of the original block so that calculated partition key columns
|
||||
/// do not interfere with possible calculated primary key columns of the same name.
|
||||
result.emplace_back(Block(block), get_partition(0));
|
||||
if (!chunk_offsets_with_partition.empty())
|
||||
result[0].offsets = std::move(chunk_offsets_with_partition[0]->offsets);
|
||||
if (!async_insert_info_with_partition.empty())
|
||||
{
|
||||
result[0].offsets = std::move(async_insert_info_with_partition[0]->offsets);
|
||||
result[0].tokens = std::move(async_insert_info_with_partition[0]->tokens);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -270,8 +279,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
result[i].block.getByPosition(col).column = std::move(scattered[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < chunk_offsets_with_partition.size(); ++i)
|
||||
result[i].offsets = std::move(chunk_offsets_with_partition[i]->offsets);
|
||||
for (size_t i = 0; i < async_insert_info_with_partition.size(); ++i)
|
||||
{
|
||||
result[i].offsets = std::move(async_insert_info_with_partition[i]->offsets);
|
||||
result[i].tokens = std::move(async_insert_info_with_partition[i]->tokens);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -23,14 +23,15 @@ struct BlockWithPartition
|
||||
Block block;
|
||||
Row partition;
|
||||
std::vector<size_t> offsets;
|
||||
std::vector<String> tokens;
|
||||
|
||||
BlockWithPartition(Block && block_, Row && partition_)
|
||||
: block(block_), partition(std::move(partition_))
|
||||
{
|
||||
}
|
||||
|
||||
BlockWithPartition(Block && block_, Row && partition_, std::vector<size_t> && offsets_)
|
||||
: block(block_), partition(std::move(partition_)), offsets(std::move(offsets_))
|
||||
BlockWithPartition(Block && block_, Row && partition_, std::vector<size_t> && offsets_, std::vector<String> && tokens_)
|
||||
: block(block_), partition(std::move(partition_)), offsets(std::move(offsets_)), tokens(std::move(tokens_))
|
||||
{
|
||||
}
|
||||
};
|
||||
@ -51,7 +52,7 @@ public:
|
||||
* (split rows by partition)
|
||||
* Works deterministically: if same block was passed, function will return same result in same order.
|
||||
*/
|
||||
static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets = nullptr);
|
||||
static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info = nullptr);
|
||||
|
||||
/// This structure contains not completely written temporary part.
|
||||
/// Some writes may happen asynchronously, e.g. for blob storages.
|
||||
|
@ -78,7 +78,7 @@ struct ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk
|
||||
unmerged_block_with_partition(std::move(unmerged_block_with_partition_)),
|
||||
part_counters(std::move(part_counters_))
|
||||
{
|
||||
initBlockIDMap();
|
||||
initBlockIDMap();
|
||||
}
|
||||
|
||||
void initBlockIDMap()
|
||||
@ -209,8 +209,8 @@ std::vector<Int64> testSelfDeduplicate(std::vector<Int64> data, std::vector<size
|
||||
column->insert(datum);
|
||||
}
|
||||
Block block({ColumnWithTypeAndName(std::move(column), DataTypePtr(new DataTypeInt64()), "a")});
|
||||
|
||||
BlockWithPartition block1(std::move(block), Row(), std::move(offsets));
|
||||
std::vector<String> tokens(offsets.size());
|
||||
BlockWithPartition block1(std::move(block), Row(), std::move(offsets), std::move(tokens));
|
||||
ProfileEvents::Counters profile_counters;
|
||||
ReplicatedMergeTreeSinkImpl<true>::DelayedChunk::Partition part(
|
||||
&Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters));
|
||||
@ -242,22 +242,29 @@ namespace
|
||||
size_t start = 0;
|
||||
auto cols = block.block.getColumns();
|
||||
std::vector<String> block_id_vec;
|
||||
for (auto offset : block.offsets)
|
||||
for (size_t i = 0; i < block.offsets.size(); ++i)
|
||||
{
|
||||
SipHash hash;
|
||||
for (size_t i = start; i < offset; ++i)
|
||||
size_t offset = block.offsets[i];
|
||||
std::string_view token = block.tokens[i];
|
||||
if (token.empty())
|
||||
{
|
||||
for (const auto & col : cols)
|
||||
col->updateHashWithValue(i, hash);
|
||||
}
|
||||
union
|
||||
{
|
||||
char bytes[16];
|
||||
UInt64 words[2];
|
||||
} hash_value;
|
||||
hash.get128(hash_value.bytes);
|
||||
SipHash hash;
|
||||
for (size_t j = start; j < offset; ++j)
|
||||
{
|
||||
for (const auto & col : cols)
|
||||
col->updateHashWithValue(j, hash);
|
||||
}
|
||||
union
|
||||
{
|
||||
char bytes[16];
|
||||
UInt64 words[2];
|
||||
} hash_value;
|
||||
hash.get128(hash_value.bytes);
|
||||
|
||||
block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1]));
|
||||
block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1]));
|
||||
}
|
||||
else
|
||||
block_id_vec.push_back(partition_id + "_" + std::string(token));
|
||||
|
||||
start = offset;
|
||||
}
|
||||
@ -418,18 +425,18 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
||||
convertDynamicColumnsToTuples(block, storage_snapshot);
|
||||
|
||||
|
||||
ChunkOffsetsPtr chunk_offsets;
|
||||
AsyncInsertInfoPtr async_insert_info;
|
||||
|
||||
if constexpr (async_insert)
|
||||
{
|
||||
const auto & chunk_info = chunk.getChunkInfo();
|
||||
if (const auto * chunk_offsets_ptr = typeid_cast<const ChunkOffsets *>(chunk_info.get()))
|
||||
chunk_offsets = std::make_shared<ChunkOffsets>(chunk_offsets_ptr->offsets);
|
||||
if (const auto * async_insert_info_ptr = typeid_cast<const AsyncInsertInfo *>(chunk_info.get()))
|
||||
async_insert_info = std::make_shared<AsyncInsertInfo>(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens);
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts");
|
||||
}
|
||||
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, chunk_offsets);
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, async_insert_info);
|
||||
|
||||
using DelayedPartition = typename ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk::Partition;
|
||||
using DelayedPartitions = std::vector<DelayedPartition>;
|
||||
@ -453,7 +460,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
||||
{
|
||||
/// we copy everything but offsets which we move because they are only used by async insert
|
||||
if (settings.optimize_on_insert && storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary)
|
||||
unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets));
|
||||
unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets), std::move(current_block.tokens));
|
||||
}
|
||||
|
||||
/// Write part to the filesystem under temporary name. Calculate a checksum.
|
||||
@ -468,7 +475,6 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
||||
|
||||
if constexpr (async_insert)
|
||||
{
|
||||
/// TODO consider insert_deduplication_token
|
||||
block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id);
|
||||
LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size());
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace DB {
|
||||
|
||||
std::vector<ChunkOffsetsPtr> scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num);
|
||||
std::vector<AsyncInsertInfoPtr> scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num);
|
||||
|
||||
class AsyncInsertsTest : public ::testing::TestPartResult
|
||||
{};
|
||||
@ -16,31 +16,36 @@ class AsyncInsertsTest : public ::testing::TestPartResult
|
||||
|
||||
TEST(AsyncInsertsTest, testScatterOffsetsBySelector)
|
||||
{
|
||||
auto test_impl = [](std::vector<size_t> offsets, std::vector<size_t> selector_data, size_t part_num, std::vector<std::vector<size_t>> expected)
|
||||
auto test_impl = [](std::vector<size_t> offsets, std::vector<size_t> selector_data, std::vector<String> tokens, size_t part_num, std::vector<std::vector<std::tuple<size_t, String>>> expected)
|
||||
{
|
||||
auto offset_ptr = std::make_shared<ChunkOffsets>(offsets);
|
||||
auto offset_ptr = std::make_shared<AsyncInsertInfo>(offsets, tokens);
|
||||
IColumn::Selector selector(selector_data.size());
|
||||
size_t num_rows = selector_data.size();
|
||||
for (size_t i = 0; i < num_rows; i++)
|
||||
selector[i] = selector_data[i];
|
||||
|
||||
auto results = scatterOffsetsBySelector(offset_ptr, selector, part_num);
|
||||
auto results = scatterAsyncInsertInfoBySelector(offset_ptr, selector, part_num);
|
||||
ASSERT_EQ(results.size(), expected.size());
|
||||
for (size_t i = 0; i < results.size(); i++)
|
||||
{
|
||||
auto result = results[i]->offsets;
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
ASSERT_EQ(result.size(), expect.size());
|
||||
for (size_t j = 0; j < result.size(); j++)
|
||||
ASSERT_EQ(result[j], expect[j]);
|
||||
ASSERT_EQ(result->offsets.size(), expect.size());
|
||||
ASSERT_EQ(result->tokens.size(), expect.size());
|
||||
for (size_t j = 0; j < expect.size(); j++)
|
||||
{
|
||||
ASSERT_EQ(result->offsets[j], std::get<0>(expect[j]));
|
||||
ASSERT_EQ(result->tokens[j], std::get<1>(expect[j]));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
test_impl({5}, {0,1,0,1,0}, 2, {{3},{2}});
|
||||
test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, 2, {{3,5},{2,5}});
|
||||
test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, 3, {{2,4},{2,4},{2,4}});
|
||||
test_impl({1,2,3,4,5}, {0,1,2,3,4}, 5, {{1},{1},{1},{1},{1}});
|
||||
test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, 3, {{4},{3},{3}});
|
||||
test_impl({1}, {0}, {"a"}, 1, {{{1,"a"}}});
|
||||
test_impl({5}, {0,1,0,1,0}, {"a"}, 2, {{{3,"a"}},{{2,"a"}}});
|
||||
test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, {"a", "b"}, 2, {{{3,"a"},{5,"b"}},{{2,"a"},{5,"b"}}});
|
||||
test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, {"a", "b", "c"}, 3, {{{2, "a"},{4, "b"}},{{2,"a"},{4,"c"}},{{2,"b"},{4,"c"}}});
|
||||
test_impl({1,2,3,4,5}, {0,1,2,3,4}, {"a", "b", "c", "d", "e"}, 5, {{{1,"a"}},{{1,"b"}},{{1, "c"}},{{1, "d"}},{{1, "e"}}});
|
||||
test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, {"a", "b", "c"}, 3, {{{4, "c"}},{{3, "a"}},{{3, "b"}}});
|
||||
}
|
||||
|
||||
std::vector<Int64> testSelfDeduplicate(std::vector<Int64> data, std::vector<size_t> offsets, std::vector<String> hashes);
|
||||
|
@ -93,6 +93,65 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl()
|
||||
void listFilesWithRegexpMatchingImpl(
|
||||
const std::string & path_for_ls,
|
||||
const std::string & for_match,
|
||||
size_t & total_bytes_to_read,
|
||||
std::vector<std::string> & result,
|
||||
bool recursive = false);
|
||||
|
||||
/*
|
||||
* When `{...}` has any `/`s, it must be processed in a different way:
|
||||
* Basically, a path with globs is processed by listFilesWithRegexpMatchingImpl. In case it detects multi-dir glob {.../..., .../...},
|
||||
* listFilesWithFoldedRegexpMatchingImpl is in charge from now on.
|
||||
* It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
|
||||
* Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
|
||||
* StorageHDFS.cpp has the same logic.
|
||||
*/
|
||||
void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls,
|
||||
const std::string & processed_suffix,
|
||||
const std::string & suffix_with_globs,
|
||||
re2::RE2 & matcher,
|
||||
size_t & total_bytes_to_read,
|
||||
const size_t max_depth,
|
||||
const size_t next_slash_after_glob_pos,
|
||||
std::vector<std::string> & result)
|
||||
{
|
||||
if (!max_depth)
|
||||
return;
|
||||
|
||||
const fs::directory_iterator end;
|
||||
for (fs::directory_iterator it(path_for_ls); it != end; ++it)
|
||||
{
|
||||
const std::string full_path = it->path().string();
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String dir_or_file_name = full_path.substr(last_slash);
|
||||
|
||||
if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
|
||||
{
|
||||
if (next_slash_after_glob_pos == std::string::npos)
|
||||
{
|
||||
total_bytes_to_read += it->file_size();
|
||||
result.push_back(it->path().string());
|
||||
}
|
||||
else
|
||||
{
|
||||
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" ,
|
||||
suffix_with_globs.substr(next_slash_after_glob_pos),
|
||||
total_bytes_to_read, result);
|
||||
}
|
||||
}
|
||||
else if (it->is_directory())
|
||||
{
|
||||
listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name,
|
||||
suffix_with_globs, matcher, total_bytes_to_read,
|
||||
max_depth - 1, next_slash_after_glob_pos, result);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* Recursive directory listing with matched paths as a result.
|
||||
* Have the same method in StorageHDFS.
|
||||
*/
|
||||
@ -101,15 +160,42 @@ void listFilesWithRegexpMatchingImpl(
|
||||
const std::string & for_match,
|
||||
size_t & total_bytes_to_read,
|
||||
std::vector<std::string> & result,
|
||||
bool recursive = false)
|
||||
bool recursive)
|
||||
{
|
||||
const size_t first_glob = for_match.find_first_of("*?{");
|
||||
const size_t first_glob_pos = for_match.find_first_of("*?{");
|
||||
const bool has_glob = first_glob_pos != std::string::npos;
|
||||
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
|
||||
const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
|
||||
|
||||
const size_t next_slash = suffix_with_globs.find('/', 1);
|
||||
const std::string current_glob = suffix_with_globs.substr(0, next_slash);
|
||||
/// slashes_in_glob counter is a upper-bound estimate of recursion depth
|
||||
/// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv
|
||||
size_t slashes_in_glob = 0;
|
||||
const size_t next_slash_after_glob_pos = [&]()
|
||||
{
|
||||
if (!has_glob)
|
||||
return suffix_with_globs.find('/', 1);
|
||||
|
||||
size_t in_curly = 0;
|
||||
for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
|
||||
{
|
||||
if (*it == '{')
|
||||
++in_curly;
|
||||
else if (*it == '/')
|
||||
{
|
||||
if (in_curly)
|
||||
++slashes_in_glob;
|
||||
else
|
||||
return size_t(std::distance(suffix_with_globs.begin(), it));
|
||||
}
|
||||
else if (*it == '}')
|
||||
--in_curly;
|
||||
}
|
||||
return std::string::npos;
|
||||
}();
|
||||
|
||||
const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
|
||||
|
||||
auto regexp = makeRegexpPatternFromGlobs(current_glob);
|
||||
|
||||
re2::RE2 matcher(regexp);
|
||||
@ -126,13 +212,22 @@ void listFilesWithRegexpMatchingImpl(
|
||||
if (!fs::exists(prefix_without_globs))
|
||||
return;
|
||||
|
||||
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
|
||||
|
||||
if (slashes_in_glob)
|
||||
{
|
||||
listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs,
|
||||
matcher, total_bytes_to_read, slashes_in_glob,
|
||||
next_slash_after_glob_pos, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const fs::directory_iterator end;
|
||||
for (fs::directory_iterator it(prefix_without_globs); it != end; ++it)
|
||||
{
|
||||
const std::string full_path = it->path().string();
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String file_name = full_path.substr(last_slash);
|
||||
const bool looking_for_directory = next_slash != std::string::npos;
|
||||
|
||||
/// Condition is_directory means what kind of path is it in current iteration of ls
|
||||
if (!it->is_directory() && !looking_for_directory)
|
||||
@ -148,14 +243,12 @@ void listFilesWithRegexpMatchingImpl(
|
||||
if (recursive)
|
||||
{
|
||||
listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" ,
|
||||
looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob ,
|
||||
looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob ,
|
||||
total_bytes_to_read, result, recursive);
|
||||
}
|
||||
else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher))
|
||||
{
|
||||
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
|
||||
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result);
|
||||
}
|
||||
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1385,7 +1385,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
|
||||
}
|
||||
|
||||
const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0,
|
||||
[&](UInt64 acc, const String& part_name)
|
||||
[&](UInt64 acc, const String & part_name)
|
||||
{
|
||||
if (const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version))
|
||||
return acc + part_info->getBlocksCount();
|
||||
|
@ -974,6 +974,7 @@ StorageS3::StorageS3(
|
||||
|
||||
FormatFactory::instance().checkFormatName(configuration.format);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri);
|
||||
context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast);
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
if (columns_.empty())
|
||||
|
@ -44,6 +44,8 @@ StorageS3Cluster::StorageS3Cluster(
|
||||
, s3_configuration{configuration_}
|
||||
{
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
|
||||
context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast);
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
updateConfigurationIfChanged(context_);
|
||||
|
||||
|
@ -1019,6 +1019,7 @@ StorageURL::StorageURL(
|
||||
distributed_processing_)
|
||||
{
|
||||
context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||
context_->getHTTPHeaderFilter().checkHeaders(headers);
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,6 +48,7 @@ StorageURLCluster::StorageURLCluster(
|
||||
, uri(uri_)
|
||||
{
|
||||
context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||
context_->getHTTPHeaderFilter().checkHeaders(configuration_.headers);
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
@ -72,7 +73,17 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont
|
||||
|
||||
auto read_buf = std::make_unique<ReadBufferFromString>(data);
|
||||
auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size);
|
||||
auto pipeline = std::make_unique<QueryPipeline>(input_format);
|
||||
QueryPipelineBuilder builder;
|
||||
builder.init(Pipe(input_format));
|
||||
if (columns.hasDefaults())
|
||||
{
|
||||
builder.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(header, columns, *input_format, context);
|
||||
});
|
||||
}
|
||||
|
||||
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||
auto reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
|
||||
std::vector<Block> blocks;
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include "registerTableFunctions.h"
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/TableFunctionNode.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
@ -32,6 +34,24 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
std::vector<size_t> TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
|
||||
{
|
||||
auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
|
||||
auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
|
||||
size_t table_function_arguments_size = table_function_arguments_nodes.size();
|
||||
|
||||
std::vector<size_t> result;
|
||||
|
||||
for (size_t i = 0; i < table_function_arguments_size; ++i)
|
||||
{
|
||||
auto * function_node = table_function_arguments_nodes[i]->as<FunctionNode>();
|
||||
if (function_node && function_node->getFunctionName() == "headers")
|
||||
result.push_back(i);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name
|
||||
void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context)
|
||||
{
|
||||
@ -41,13 +61,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
|
||||
}
|
||||
else
|
||||
{
|
||||
if (args.empty() || args.size() > 6)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
|
||||
|
||||
auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context);
|
||||
if (header_it != args.end())
|
||||
args.erase(header_it);
|
||||
|
||||
if (args.empty() || args.size() > 6)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
|
||||
|
||||
for (auto & arg : args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
||||
|
||||
|
@ -73,6 +73,10 @@ protected:
|
||||
|
||||
mutable StorageS3::Configuration configuration;
|
||||
ColumnsDescription structure_hint;
|
||||
|
||||
private:
|
||||
|
||||
std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -246,6 +246,12 @@ def main():
|
||||
|
||||
if args.check_running_workflows:
|
||||
workflows = get_workflows_for_head(repo, pr.head.sha)
|
||||
logging.info(
|
||||
"The PR #%s has following workflows:\n%s",
|
||||
pr.number,
|
||||
"\n".join(f"{wf.html_url}: status is {wf.status}" for wf in workflows),
|
||||
)
|
||||
|
||||
workflows_in_progress = [wf for wf in workflows if wf.status != "completed"]
|
||||
# At most one workflow in progress is fine. We check that there no
|
||||
# cases like, e.g. PullRequestCI and DocksCheck in progress at once
|
||||
|
6
tests/config/config.d/forbidden_headers.xml
Normal file
6
tests/config/config.d/forbidden_headers.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<clickhouse>
|
||||
<http_forbid_headers>
|
||||
<header>exact_header</header>
|
||||
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
|
||||
</http_forbid_headers>
|
||||
</clickhouse>
|
@ -1,64 +1,14 @@
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<!-- s3 disks -->
|
||||
<s3_common_disk>
|
||||
<s3_disk>
|
||||
<type>s3</type>
|
||||
<path>s3_common_disk/</path>
|
||||
<path>s3_disk/</path>
|
||||
<endpoint>http://localhost:11111/test/common/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_common_disk>
|
||||
<s3_disk>
|
||||
<type>s3</type>
|
||||
<path>s3_disk/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk>
|
||||
<s3_disk_2>
|
||||
<type>s3</type>
|
||||
<path>s3_disk_2/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk_2>
|
||||
<s3_disk_3>
|
||||
<type>s3</type>
|
||||
<path>s3_disk_3/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk_3>
|
||||
<s3_disk_4>
|
||||
<type>s3</type>
|
||||
<path>s3_disk_4/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk_4>
|
||||
<s3_disk_5>
|
||||
<type>s3</type>
|
||||
<path>s3_disk_5/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk_5>
|
||||
<s3_disk_6>
|
||||
<type>s3</type>
|
||||
<path>s3_disk_6/</path>
|
||||
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk_6>
|
||||
<!-- cache for s3 disks -->
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk</disk>
|
||||
@ -67,65 +17,6 @@
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache>
|
||||
<s3_cache_2>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_2</disk>
|
||||
<path>s3_cache_2/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<max_file_segment_size>100Mi</max_file_segment_size>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_2>
|
||||
<s3_cache_3>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_3</disk>
|
||||
<path>s3_disk_3_cache/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<data_cache_max_size>22548578304</data_cache_max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_3>
|
||||
<s3_cache_4>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_4</disk>
|
||||
<path>s3_cache_4/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<enable_filesystem_query_cache_limit>1</enable_filesystem_query_cache_limit>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_4>
|
||||
<s3_cache_5>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_5</disk>
|
||||
<path>s3_cache_5/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_5>
|
||||
<s3_cache_6>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_6</disk>
|
||||
<path>s3_cache_6/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<enable_bypass_cache_with_threashold>1</enable_bypass_cache_with_threashold>
|
||||
<bypass_cache_threashold>100</bypass_cache_threashold>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_6>
|
||||
<s3_cache_small>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_6</disk>
|
||||
<path>s3_cache_small/</path>
|
||||
<max_size>1000</max_size>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_small>
|
||||
<s3_cache_small_segment_size>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_6</disk>
|
||||
<path>s3_cache_small_segment_size/</path>
|
||||
<max_size>128Mi</max_size>
|
||||
<max_file_segment_size>10Ki</max_file_segment_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_small_segment_size>
|
||||
<!-- local disks -->
|
||||
<local_disk>
|
||||
<type>local_blob_storage</type>
|
||||
@ -167,7 +58,7 @@
|
||||
<!-- multi layer cache -->
|
||||
<s3_cache_multi>
|
||||
<type>cache</type>
|
||||
<disk>s3_cache_5</disk>
|
||||
<disk>s3_cache</disk>
|
||||
<path>s3_cache_multi/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
@ -188,34 +79,6 @@
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache>
|
||||
<s3_cache_2>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_2</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_2>
|
||||
<s3_cache_3>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_3>
|
||||
<s3_cache_4>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_4</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_4>
|
||||
<s3_cache_6>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_6</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_6>
|
||||
<s3_cache_multi>
|
||||
<volumes>
|
||||
<main>
|
||||
@ -223,13 +86,6 @@
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_multi>
|
||||
<s3_cache_small>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_small</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_small>
|
||||
<local_cache>
|
||||
<volumes>
|
||||
<main>
|
||||
@ -251,13 +107,6 @@
|
||||
</main>
|
||||
</volumes>
|
||||
</local_cache_3>
|
||||
<s3_cache_small_segment_size>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3_cache_small_segment_size</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3_cache_small_segment_size>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
</clickhouse>
|
||||
|
@ -51,6 +51,7 @@ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/system_unfreeze.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/
|
||||
|
@ -486,6 +486,8 @@ class ClickHouseCluster:
|
||||
self.kafka_docker_id = None
|
||||
self.schema_registry_host = "schema-registry"
|
||||
self._schema_registry_port = 0
|
||||
self.schema_registry_auth_host = "schema-registry-auth"
|
||||
self._schema_registry_auth_port = 0
|
||||
self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host)
|
||||
|
||||
self.coredns_host = "coredns"
|
||||
@ -657,6 +659,13 @@ class ClickHouseCluster:
|
||||
self._schema_registry_port = get_free_port()
|
||||
return self._schema_registry_port
|
||||
|
||||
@property
|
||||
def schema_registry_auth_port(self):
|
||||
if self._schema_registry_auth_port:
|
||||
return self._schema_registry_auth_port
|
||||
self._schema_registry_auth_port = get_free_port()
|
||||
return self._schema_registry_auth_port
|
||||
|
||||
@property
|
||||
def kerberized_kafka_port(self):
|
||||
if self._kerberized_kafka_port:
|
||||
@ -1163,8 +1172,11 @@ class ClickHouseCluster:
|
||||
self.with_kafka = True
|
||||
env_variables["KAFKA_HOST"] = self.kafka_host
|
||||
env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port)
|
||||
env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/"
|
||||
env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port)
|
||||
env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081"
|
||||
env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str(
|
||||
self.schema_registry_auth_port
|
||||
)
|
||||
self.base_cmd.extend(
|
||||
["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")]
|
||||
)
|
||||
@ -1498,6 +1510,7 @@ class ClickHouseCluster:
|
||||
with_kafka=False,
|
||||
with_kerberized_kafka=False,
|
||||
with_kerberos_kdc=False,
|
||||
with_secrets=False,
|
||||
with_rabbitmq=False,
|
||||
with_nats=False,
|
||||
clickhouse_path_dir=None,
|
||||
@ -1604,6 +1617,10 @@ class ClickHouseCluster:
|
||||
with_nats=with_nats,
|
||||
with_nginx=with_nginx,
|
||||
with_kerberized_hdfs=with_kerberized_hdfs,
|
||||
with_secrets=with_secrets
|
||||
or with_kerberized_hdfs
|
||||
or with_kerberos_kdc
|
||||
or with_kerberized_kafka,
|
||||
with_mongo=with_mongo or with_mongo_secure,
|
||||
with_meili=with_meili,
|
||||
with_redis=with_redis,
|
||||
@ -2493,20 +2510,27 @@ class ClickHouseCluster:
|
||||
raise Exception("Can't wait Azurite to start")
|
||||
|
||||
def wait_schema_registry_to_start(self, timeout=180):
|
||||
sr_client = CachedSchemaRegistryClient(
|
||||
{"url": "http://localhost:{}".format(self.schema_registry_port)}
|
||||
)
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
sr_client._send_request(sr_client.url)
|
||||
logging.debug("Connected to SchemaRegistry")
|
||||
return sr_client
|
||||
except Exception as ex:
|
||||
logging.debug(("Can't connect to SchemaRegistry: %s", str(ex)))
|
||||
time.sleep(1)
|
||||
for port in self.schema_registry_port, self.schema_registry_auth_port:
|
||||
reg_url = "http://localhost:{}".format(port)
|
||||
arg = {"url": reg_url}
|
||||
sr_client = CachedSchemaRegistryClient(arg)
|
||||
|
||||
raise Exception("Can't wait Schema Registry to start")
|
||||
start = time.time()
|
||||
sr_started = False
|
||||
sr_auth_started = False
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
sr_client._send_request(sr_client.url)
|
||||
logging.debug("Connected to SchemaRegistry")
|
||||
# don't care about possible auth errors
|
||||
sr_started = True
|
||||
break
|
||||
except Exception as ex:
|
||||
logging.debug(("Can't connect to SchemaRegistry: %s", str(ex)))
|
||||
time.sleep(1)
|
||||
|
||||
if not sr_started:
|
||||
raise Exception("Can't wait Schema Registry to start")
|
||||
|
||||
def wait_cassandra_to_start(self, timeout=180):
|
||||
self.cassandra_ip = self.get_instance_ip(self.cassandra_host)
|
||||
@ -3135,6 +3159,7 @@ class ClickHouseInstance:
|
||||
with_nats,
|
||||
with_nginx,
|
||||
with_kerberized_hdfs,
|
||||
with_secrets,
|
||||
with_mongo,
|
||||
with_meili,
|
||||
with_redis,
|
||||
@ -3197,7 +3222,7 @@ class ClickHouseInstance:
|
||||
if clickhouse_path_dir
|
||||
else None
|
||||
)
|
||||
self.kerberos_secrets_dir = p.abspath(p.join(base_path, "secrets"))
|
||||
self.secrets_dir = p.abspath(p.join(base_path, "secrets"))
|
||||
self.macros = macros if macros is not None else {}
|
||||
self.with_zookeeper = with_zookeeper
|
||||
self.zookeeper_config_path = zookeeper_config_path
|
||||
@ -3220,6 +3245,7 @@ class ClickHouseInstance:
|
||||
self.with_nats = with_nats
|
||||
self.with_nginx = with_nginx
|
||||
self.with_kerberized_hdfs = with_kerberized_hdfs
|
||||
self.with_secrets = with_secrets
|
||||
self.with_mongo = with_mongo
|
||||
self.with_meili = with_meili
|
||||
self.with_redis = with_redis
|
||||
@ -4217,17 +4243,16 @@ class ClickHouseInstance:
|
||||
if self.with_zookeeper:
|
||||
shutil.copy(self.zookeeper_config_path, conf_d_dir)
|
||||
|
||||
if (
|
||||
self.with_kerberized_kafka
|
||||
or self.with_kerberized_hdfs
|
||||
or self.with_kerberos_kdc
|
||||
):
|
||||
if self.with_secrets:
|
||||
if self.with_kerberos_kdc:
|
||||
base_secrets_dir = self.cluster.instances_dir
|
||||
else:
|
||||
base_secrets_dir = self.path
|
||||
from_dir = self.secrets_dir
|
||||
to_dir = p.abspath(p.join(base_secrets_dir, "secrets"))
|
||||
logging.debug(f"Copy secret from {from_dir} to {to_dir}")
|
||||
shutil.copytree(
|
||||
self.kerberos_secrets_dir,
|
||||
self.secrets_dir,
|
||||
p.abspath(p.join(base_secrets_dir, "secrets")),
|
||||
dirs_exist_ok=True,
|
||||
)
|
||||
|
@ -0,0 +1,3 @@
|
||||
schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user
|
||||
schemauser/slash: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user
|
||||
complexschemauser: MD5:fcaeda86837fcd37755044e7258edc5d,user
|
@ -0,0 +1,5 @@
|
||||
RealmFooBar {
|
||||
org.eclipse.jetty.jaas.spi.PropertyFileLoginModule required
|
||||
file="/etc/schema-registry/secrets/password"
|
||||
debug="true";
|
||||
};
|
@ -1,5 +1,6 @@
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
|
||||
import avro.schema
|
||||
import pytest
|
||||
@ -8,13 +9,14 @@ from confluent_kafka.avro.cached_schema_registry_client import (
|
||||
)
|
||||
from confluent_kafka.avro.serializer.message_serializer import MessageSerializer
|
||||
from helpers.cluster import ClickHouseCluster, ClickHouseInstance
|
||||
from urllib import parse
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance("dummy", with_kafka=True)
|
||||
cluster.add_instance("dummy", with_kafka=True, with_secrets=True)
|
||||
logging.info("Starting cluster...")
|
||||
cluster.start()
|
||||
logging.info("Cluster started")
|
||||
@ -40,14 +42,15 @@ def run_query(instance, query, data=None, settings=None):
|
||||
def test_select(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
schema_registry_client = CachedSchemaRegistryClient(
|
||||
"http://localhost:{}".format(started_cluster.schema_registry_port)
|
||||
)
|
||||
reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port)
|
||||
arg = {"url": reg_url}
|
||||
|
||||
schema_registry_client = CachedSchemaRegistryClient(arg)
|
||||
serializer = MessageSerializer(schema_registry_client)
|
||||
|
||||
schema = avro.schema.make_avsc_object(
|
||||
{
|
||||
"name": "test_record",
|
||||
"name": "test_record1",
|
||||
"type": "record",
|
||||
"fields": [{"name": "value", "type": "long"}],
|
||||
}
|
||||
@ -56,14 +59,14 @@ def test_select(started_cluster):
|
||||
buf = io.BytesIO()
|
||||
for x in range(0, 3):
|
||||
message = serializer.encode_record_with_schema(
|
||||
"test_subject", schema, {"value": x}
|
||||
"test_subject1", schema, {"value": x}
|
||||
)
|
||||
buf.write(message)
|
||||
data = buf.getvalue()
|
||||
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
schema_registry_url = "http://{}:{}".format(
|
||||
started_cluster.schema_registry_host, 8081
|
||||
started_cluster.schema_registry_host, started_cluster.schema_registry_port
|
||||
)
|
||||
|
||||
run_query(instance, "create table avro_data(value Int64) engine = Memory()")
|
||||
@ -75,3 +78,164 @@ def test_select(started_cluster):
|
||||
["1"],
|
||||
["2"],
|
||||
]
|
||||
|
||||
|
||||
def test_select_auth(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port)
|
||||
arg = {
|
||||
"url": reg_url,
|
||||
"basic.auth.credentials.source": "USER_INFO",
|
||||
"basic.auth.user.info": "schemauser:letmein",
|
||||
}
|
||||
|
||||
schema_registry_client = CachedSchemaRegistryClient(arg)
|
||||
serializer = MessageSerializer(schema_registry_client)
|
||||
|
||||
schema = avro.schema.make_avsc_object(
|
||||
{
|
||||
"name": "test_record_auth",
|
||||
"type": "record",
|
||||
"fields": [{"name": "value", "type": "long"}],
|
||||
}
|
||||
)
|
||||
|
||||
buf = io.BytesIO()
|
||||
for x in range(0, 3):
|
||||
message = serializer.encode_record_with_schema(
|
||||
"test_subject_auth", schema, {"value": x}
|
||||
)
|
||||
buf.write(message)
|
||||
data = buf.getvalue()
|
||||
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
schema_registry_url = "http://{}:{}@{}:{}".format(
|
||||
"schemauser",
|
||||
"letmein",
|
||||
started_cluster.schema_registry_auth_host,
|
||||
started_cluster.schema_registry_auth_port,
|
||||
)
|
||||
|
||||
run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()")
|
||||
settings = {"format_avro_schema_registry_url": schema_registry_url}
|
||||
run_query(
|
||||
instance, "insert into avro_data_auth format AvroConfluent", data, settings
|
||||
)
|
||||
stdout = run_query(instance, "select * from avro_data_auth")
|
||||
assert list(map(str.split, stdout.splitlines())) == [
|
||||
["0"],
|
||||
["1"],
|
||||
["2"],
|
||||
]
|
||||
|
||||
|
||||
def test_select_auth_encoded(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port)
|
||||
arg = {
|
||||
"url": reg_url,
|
||||
"basic.auth.credentials.source": "USER_INFO",
|
||||
"basic.auth.user.info": "schemauser:letmein",
|
||||
}
|
||||
|
||||
schema_registry_client = CachedSchemaRegistryClient(arg)
|
||||
serializer = MessageSerializer(schema_registry_client)
|
||||
|
||||
schema = avro.schema.make_avsc_object(
|
||||
{
|
||||
"name": "test_record_auth_encoded",
|
||||
"type": "record",
|
||||
"fields": [{"name": "value", "type": "long"}],
|
||||
}
|
||||
)
|
||||
|
||||
buf = io.BytesIO()
|
||||
for x in range(0, 3):
|
||||
message = serializer.encode_record_with_schema(
|
||||
"test_subject_auth_encoded", schema, {"value": x}
|
||||
)
|
||||
buf.write(message)
|
||||
data = buf.getvalue()
|
||||
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
schema_registry_url = "http://{}:{}@{}:{}".format(
|
||||
parse.quote_plus("schemauser/slash"),
|
||||
parse.quote_plus("letmein"),
|
||||
started_cluster.schema_registry_auth_host,
|
||||
started_cluster.schema_registry_auth_port,
|
||||
)
|
||||
|
||||
run_query(
|
||||
instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()"
|
||||
)
|
||||
settings = {"format_avro_schema_registry_url": schema_registry_url}
|
||||
run_query(
|
||||
instance,
|
||||
"insert into avro_data_auth_encoded format AvroConfluent",
|
||||
data,
|
||||
settings,
|
||||
)
|
||||
stdout = run_query(instance, "select * from avro_data_auth_encoded")
|
||||
assert list(map(str.split, stdout.splitlines())) == [
|
||||
["0"],
|
||||
["1"],
|
||||
["2"],
|
||||
]
|
||||
|
||||
|
||||
def test_select_auth_encoded_complex(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port)
|
||||
arg = {
|
||||
"url": reg_url,
|
||||
"basic.auth.credentials.source": "USER_INFO",
|
||||
"basic.auth.user.info": "schemauser:letmein",
|
||||
}
|
||||
|
||||
schema_registry_client = CachedSchemaRegistryClient(arg)
|
||||
serializer = MessageSerializer(schema_registry_client)
|
||||
|
||||
schema = avro.schema.make_avsc_object(
|
||||
{
|
||||
"name": "test_record_auth_encoded_complex",
|
||||
"type": "record",
|
||||
"fields": [{"name": "value", "type": "long"}],
|
||||
}
|
||||
)
|
||||
|
||||
buf = io.BytesIO()
|
||||
for x in range(0, 3):
|
||||
message = serializer.encode_record_with_schema(
|
||||
"test_subject_auth_encoded_complex", schema, {"value": x}
|
||||
)
|
||||
buf.write(message)
|
||||
data = buf.getvalue()
|
||||
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
schema_registry_url = "http://{}:{}@{}:{}".format(
|
||||
parse.quote_plus("complexschemauser"),
|
||||
parse.quote_plus("letmein%@:/"),
|
||||
started_cluster.schema_registry_auth_host,
|
||||
started_cluster.schema_registry_auth_port,
|
||||
)
|
||||
|
||||
run_query(
|
||||
instance,
|
||||
"create table avro_data_auth_encoded_complex(value Int64) engine = Memory()",
|
||||
)
|
||||
settings = {"format_avro_schema_registry_url": schema_registry_url}
|
||||
run_query(
|
||||
instance,
|
||||
"insert into avro_data_auth_encoded_complex format AvroConfluent",
|
||||
data,
|
||||
settings,
|
||||
)
|
||||
stdout = run_query(instance, "select * from avro_data_auth_encoded_complex")
|
||||
assert list(map(str.split, stdout.splitlines())) == [
|
||||
["0"],
|
||||
["1"],
|
||||
["2"],
|
||||
]
|
||||
|
@ -1,6 +1,10 @@
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<disk_hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>hdfs://hdfs1:9000/</endpoint>
|
||||
</disk_hdfs>
|
||||
<log_local>
|
||||
<type>local</type>
|
||||
<path>/var/lib/clickhouse/coordination/logs/</path>
|
||||
|
@ -9,7 +9,11 @@ import os
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance(
|
||||
"node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True
|
||||
"node",
|
||||
main_configs=["configs/enable_keeper.xml"],
|
||||
stay_alive=True,
|
||||
with_minio=True,
|
||||
with_hdfs=True,
|
||||
)
|
||||
|
||||
from kazoo.client import KazooClient, KazooState
|
||||
@ -117,6 +121,12 @@ def get_local_snapshots():
|
||||
return get_local_files("/var/lib/clickhouse/coordination/snapshots")
|
||||
|
||||
|
||||
def test_supported_disk_types(started_cluster):
|
||||
node.stop_clickhouse()
|
||||
node.start_clickhouse()
|
||||
node.contains_in_log("Disk type 'hdfs' is not supported for Keeper")
|
||||
|
||||
|
||||
def test_logs_with_disks(started_cluster):
|
||||
setup_local_storage(started_cluster)
|
||||
|
||||
|
@ -39,257 +39,261 @@ def test_lost_part_same_replica(start_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
)
|
||||
try:
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt0")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
node1.query("SYSTEM STOP MERGES mt0")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
|
||||
for i in range(5):
|
||||
node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))")
|
||||
for i in range(5):
|
||||
node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))")
|
||||
|
||||
for i in range(20):
|
||||
parts_to_merge = node1.query(
|
||||
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0"
|
||||
)
|
||||
if parts_to_merge:
|
||||
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
|
||||
print("Got parts list", parts_list)
|
||||
if len(parts_list) < 3:
|
||||
raise Exception(f"Got too small parts list {parts_list}")
|
||||
break
|
||||
time.sleep(1)
|
||||
for i in range(20):
|
||||
parts_to_merge = node1.query(
|
||||
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0"
|
||||
)
|
||||
if parts_to_merge:
|
||||
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
|
||||
print("Got parts list", parts_list)
|
||||
if len(parts_list) < 3:
|
||||
raise Exception(f"Got too small parts list {parts_list}")
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
victim_part_from_the_middle = random.choice(parts_list[1:-1])
|
||||
print("Will corrupt part", victim_part_from_the_middle)
|
||||
victim_part_from_the_middle = random.choice(parts_list[1:-1])
|
||||
print("Will corrupt part", victim_part_from_the_middle)
|
||||
|
||||
remove_part_from_disk(node1, "mt0", victim_part_from_the_middle)
|
||||
remove_part_from_disk(node1, "mt0", victim_part_from_the_middle)
|
||||
|
||||
node1.query("DETACH TABLE mt0")
|
||||
node1.query("DETACH TABLE mt0")
|
||||
|
||||
node1.query("ATTACH TABLE mt0")
|
||||
node1.query("ATTACH TABLE mt0")
|
||||
|
||||
node1.query("SYSTEM START MERGES mt0")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
node1.query("SYSTEM START MERGES mt0")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
|
||||
for i in range(10):
|
||||
result = node1.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node1.query(
|
||||
"SELECT count() FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
for i in range(10):
|
||||
result = node1.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node1.query(
|
||||
"SELECT count() FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
|
||||
assert node1.contains_in_log(
|
||||
"Created empty part"
|
||||
), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed"
|
||||
assert node1.contains_in_log(
|
||||
"Created empty part"
|
||||
), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed"
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM mt0") == "4\n"
|
||||
assert node1.query("SELECT COUNT() FROM mt0") == "4\n"
|
||||
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
finally:
|
||||
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
|
||||
|
||||
|
||||
def test_lost_part_other_replica(start_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
try:
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt1")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
|
||||
for i in range(5):
|
||||
node1.query(f"INSERT INTO mt1 VALUES ({i})")
|
||||
|
||||
for i in range(20):
|
||||
parts_to_merge = node1.query(
|
||||
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0"
|
||||
)
|
||||
if parts_to_merge:
|
||||
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
|
||||
print("Got parts list", parts_list)
|
||||
if len(parts_list) < 3:
|
||||
raise Exception("Got too small parts list {}".format(parts_list))
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
victim_part_from_the_middle = random.choice(parts_list[1:-1])
|
||||
print("Will corrupt part", victim_part_from_the_middle)
|
||||
|
||||
remove_part_from_disk(node1, "mt1", victim_part_from_the_middle)
|
||||
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt1")
|
||||
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
|
||||
for i in range(10):
|
||||
result = node2.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node2.query(
|
||||
"SELECT * FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
|
||||
assert node1.contains_in_log(
|
||||
"Created empty part"
|
||||
), "Seems like empty part {} is not created or log message changed".format(
|
||||
victim_part_from_the_middle
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt1")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
for i in range(5):
|
||||
node1.query(f"INSERT INTO mt1 VALUES ({i})")
|
||||
node1.query("SYSTEM START MERGES mt1")
|
||||
|
||||
for i in range(20):
|
||||
parts_to_merge = node1.query(
|
||||
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0"
|
||||
)
|
||||
if parts_to_merge:
|
||||
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
|
||||
print("Got parts list", parts_list)
|
||||
if len(parts_list) < 3:
|
||||
raise Exception("Got too small parts list {}".format(parts_list))
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
victim_part_from_the_middle = random.choice(parts_list[1:-1])
|
||||
print("Will corrupt part", victim_part_from_the_middle)
|
||||
|
||||
remove_part_from_disk(node1, "mt1", victim_part_from_the_middle)
|
||||
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt1")
|
||||
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
|
||||
for i in range(10):
|
||||
result = node2.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node2.query(
|
||||
"SELECT * FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
|
||||
assert node1.contains_in_log(
|
||||
"Created empty part"
|
||||
), "Seems like empty part {} is not created or log message changed".format(
|
||||
victim_part_from_the_middle
|
||||
)
|
||||
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("SYSTEM START MERGES mt1")
|
||||
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
finally:
|
||||
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
|
||||
|
||||
|
||||
def test_lost_part_mutation(start_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
try:
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() "
|
||||
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt2")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
|
||||
for i in range(2):
|
||||
node1.query(f"INSERT INTO mt2 VALUES ({i})")
|
||||
|
||||
node1.query(
|
||||
"ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt2")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
for i in range(20):
|
||||
parts_to_mutate = node1.query(
|
||||
"SELECT count() FROM system.replication_queue WHERE table='mt2'"
|
||||
)
|
||||
# two mutations for both replicas
|
||||
if int(parts_to_mutate) == 4:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
for i in range(2):
|
||||
node1.query(f"INSERT INTO mt2 VALUES ({i})")
|
||||
remove_part_from_disk(node1, "mt2", "all_1_1_0")
|
||||
|
||||
node1.query(
|
||||
"ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
|
||||
)
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt2")
|
||||
|
||||
for i in range(20):
|
||||
parts_to_mutate = node1.query(
|
||||
"SELECT count() FROM system.replication_queue WHERE table='mt2'"
|
||||
)
|
||||
# two mutations for both replicas
|
||||
if int(parts_to_mutate) == 4:
|
||||
break
|
||||
time.sleep(1)
|
||||
node1.query("SYSTEM START MERGES mt2")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
|
||||
remove_part_from_disk(node1, "mt2", "all_1_1_0")
|
||||
for i in range(10):
|
||||
result = node1.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node1.query(
|
||||
"SELECT * FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt2")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1")
|
||||
assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("SYSTEM START MERGES mt2")
|
||||
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2")
|
||||
print("result: ", res)
|
||||
print("error: ", res)
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
|
||||
for i in range(10):
|
||||
result = node1.query("SELECT count() FROM system.replication_queue")
|
||||
if int(result) == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Still have something in replication queue:\n" + node1.query(
|
||||
"SELECT * FROM system.replication_queue FORMAT Vertical"
|
||||
)
|
||||
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1")
|
||||
assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node2.query("SYSTEM START REPLICATION QUEUES")
|
||||
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1")
|
||||
assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1")
|
||||
assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777")
|
||||
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
finally:
|
||||
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
|
||||
|
||||
|
||||
def test_lost_last_part(start_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') "
|
||||
"ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
try:
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') "
|
||||
"ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
|
||||
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt3")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
|
||||
for i in range(1):
|
||||
node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')")
|
||||
|
||||
# actually not important
|
||||
node1.query(
|
||||
"ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
|
||||
)
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mt3")
|
||||
node2.query("SYSTEM STOP REPLICATION QUEUES")
|
||||
partition_id = node1.query("select partitionId('x')").strip()
|
||||
remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0")
|
||||
|
||||
for i in range(1):
|
||||
node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')")
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt3")
|
||||
|
||||
# actually not important
|
||||
node1.query(
|
||||
"ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
|
||||
)
|
||||
node1.query("SYSTEM START MERGES mt3")
|
||||
|
||||
partition_id = node1.query("select partitionId('x')").strip()
|
||||
remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0")
|
||||
for i in range(100):
|
||||
result = node1.query(
|
||||
"SELECT count() FROM system.replication_queue WHERE table='mt3'"
|
||||
)
|
||||
assert int(result) <= 2, "Have a lot of entries in queue {}".format(
|
||||
node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")
|
||||
)
|
||||
if node1.contains_in_log(
|
||||
"Cannot create empty part"
|
||||
) and node1.contains_in_log("DROP/DETACH PARTITION"):
|
||||
break
|
||||
if node1.contains_in_log(
|
||||
"Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 "
|
||||
):
|
||||
break
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
assert False, "Don't have required messages in node1 log"
|
||||
|
||||
# other way to detect broken parts
|
||||
node1.query("CHECK TABLE mt3")
|
||||
node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'")
|
||||
|
||||
node1.query("SYSTEM START MERGES mt3")
|
||||
|
||||
for i in range(10):
|
||||
result = node1.query(
|
||||
"SELECT count() FROM system.replication_queue WHERE table='mt3'"
|
||||
)
|
||||
assert int(result) <= 2, "Have a lot of entries in queue {}".format(
|
||||
node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")
|
||||
)
|
||||
if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log(
|
||||
"DROP/DETACH PARTITION"
|
||||
):
|
||||
break
|
||||
if node1.contains_in_log(
|
||||
"Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 "
|
||||
):
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
assert False, "Don't have required messages in node1 log"
|
||||
|
||||
node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'")
|
||||
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
|
||||
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0")
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
|
||||
finally:
|
||||
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS mt3 SYNC")
|
||||
|
@ -85,6 +85,32 @@ def test_read_write_storage_with_globs(started_cluster):
|
||||
assert "in readonly mode" in str(ex)
|
||||
|
||||
|
||||
def test_storage_with_multidirectory_glob(started_cluster):
|
||||
hdfs_api = started_cluster.hdfs_api
|
||||
for i in ["1", "2"]:
|
||||
hdfs_api.write_data(
|
||||
f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n"
|
||||
)
|
||||
assert (
|
||||
hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}")
|
||||
== f"File{i}\t{i}{i}\n"
|
||||
)
|
||||
|
||||
r = node1.query(
|
||||
"SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)"
|
||||
)
|
||||
assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n")
|
||||
|
||||
try:
|
||||
node1.query(
|
||||
"SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)"
|
||||
)
|
||||
assert False, "Exception have to be thrown"
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
assert "no files" in str(ex)
|
||||
|
||||
|
||||
def test_read_write_table(started_cluster):
|
||||
hdfs_api = started_cluster.hdfs_api
|
||||
|
||||
|
@ -762,7 +762,7 @@ def test_kafka_formats(kafka_cluster):
|
||||
),
|
||||
],
|
||||
"extra_settings": ", format_avro_schema_registry_url='http://{}:{}'".format(
|
||||
kafka_cluster.schema_registry_host, 8081
|
||||
kafka_cluster.schema_registry_host, kafka_cluster.schema_registry_port
|
||||
),
|
||||
"supports_empty_value": True,
|
||||
},
|
||||
@ -4339,7 +4339,7 @@ def test_row_based_formats(kafka_cluster):
|
||||
f"""
|
||||
DROP TABLE IF EXISTS test.view;
|
||||
DROP TABLE IF EXISTS test.kafka;
|
||||
|
||||
|
||||
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
||||
ENGINE = Kafka
|
||||
SETTINGS kafka_broker_list = 'kafka1:19092',
|
||||
@ -4347,10 +4347,10 @@ def test_row_based_formats(kafka_cluster):
|
||||
kafka_group_name = '{format_name}',
|
||||
kafka_format = '{format_name}',
|
||||
kafka_max_rows_per_message = 5;
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW test.view Engine=Log AS
|
||||
SELECT key, value FROM test.kafka;
|
||||
|
||||
|
||||
INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows});
|
||||
"""
|
||||
)
|
||||
@ -4459,17 +4459,17 @@ def test_block_based_formats_2(kafka_cluster):
|
||||
f"""
|
||||
DROP TABLE IF EXISTS test.view;
|
||||
DROP TABLE IF EXISTS test.kafka;
|
||||
|
||||
|
||||
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
||||
ENGINE = Kafka
|
||||
SETTINGS kafka_broker_list = 'kafka1:19092',
|
||||
kafka_topic_list = '{format_name}',
|
||||
kafka_group_name = '{format_name}',
|
||||
kafka_format = '{format_name}';
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW test.view Engine=Log AS
|
||||
SELECT key, value FROM test.kafka;
|
||||
|
||||
|
||||
INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;
|
||||
"""
|
||||
)
|
||||
|
0
tests/integration/test_temporary_data/__init__.py
Normal file
0
tests/integration/test_temporary_data/__init__.py
Normal file
57
tests/integration/test_temporary_data/test.py
Normal file
57
tests/integration/test_temporary_data/test.py
Normal file
@ -0,0 +1,57 @@
|
||||
# pylint: disable=unused-argument
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import pytest
|
||||
import time
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node = cluster.add_instance(
|
||||
"node",
|
||||
stay_alive=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_tmp_data_no_leftovers(start_cluster):
|
||||
q = node.get_query_request
|
||||
|
||||
settings = {
|
||||
"max_bytes_before_external_group_by": "10K",
|
||||
"max_bytes_before_external_sort": "10K",
|
||||
"join_algorithm": "grace_hash",
|
||||
"max_bytes_in_join": "10K",
|
||||
"grace_hash_join_initial_buckets": "16",
|
||||
}
|
||||
|
||||
# Run some queries in the background to generate temporary data
|
||||
q(
|
||||
"SELECT ignore(*) FROM numbers(10 * 1024 * 1024) ORDER BY sipHash64(number)",
|
||||
settings=settings,
|
||||
)
|
||||
q("SELECT * FROM system.numbers GROUP BY ALL", settings=settings)
|
||||
q(
|
||||
"SELECT * FROM system.numbers as t1 JOIN system.numbers as t2 USING (number)",
|
||||
settings=settings,
|
||||
)
|
||||
|
||||
# Wait a bit to make sure the temporary data is written to disk
|
||||
time.sleep(5)
|
||||
|
||||
# Hard restart the node
|
||||
node.restart_clickhouse(kill=True)
|
||||
path_to_data = "/var/lib/clickhouse/"
|
||||
|
||||
# Check that there are no temporary files left
|
||||
result = node.exec_in_container(["ls", path_to_data + "tmp/"])
|
||||
assert result == ""
|
@ -36,7 +36,7 @@ ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r1"
|
||||
${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r2"
|
||||
|
||||
# Should be empty, but in case of problems we will see some diagnostics
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.replication_queue WHERE table like 'kill_mutation_r%'"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.replication_queue WHERE database = '$CLICKHOUSE_DATABASE' AND table like 'kill_mutation_r%'"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="ALTER TABLE kill_mutation_r1 DELETE WHERE toUInt32(s) = 1"
|
||||
|
||||
@ -57,6 +57,14 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM system.mutations WHERE database
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'"
|
||||
|
||||
# Wait for the 1st mutation to be actually killed and the 2nd to finish
|
||||
query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1)
|
||||
while [ "$query_result" != "0" ]
|
||||
do
|
||||
query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1)
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r1"
|
||||
${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r2"
|
||||
|
||||
|
@ -33,3 +33,4 @@
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
|
@ -23,5 +23,10 @@ SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t != B.t; -- { serverError
|
||||
|
||||
SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t < B.t OR A.a == B.b + 1 ORDER BY (A.a, A.t); -- { serverError 48 }
|
||||
|
||||
SELECT A.a, A.t, B.b, B.t FROM A
|
||||
ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A.a == B.b
|
||||
WHERE B.t != 3 ORDER BY (A.a, A.t)
|
||||
;
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
||||
|
@ -138,8 +138,13 @@ while true ; do
|
||||
done
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_mutate_mt_$i"
|
||||
$CLICKHOUSE_CLIENT --query "CHECK TABLE concurrent_mutate_mt_$i" &> /dev/null # if we will remove something the output of select will be wrong
|
||||
$CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_mutate_mt_$i"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # all mutations have to be done
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # for verbose output
|
||||
done
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_mutate_mt_$i"
|
||||
done
|
||||
|
@ -0,0 +1,12 @@
|
||||
select 1
|
||||
\N 1232 Johny
|
||||
select 2
|
||||
\N 1232 Johny
|
||||
select 3
|
||||
\N 1232 Johny
|
||||
select 4
|
||||
\N 1232 Johny
|
||||
select 5
|
||||
\N 1232 Johny
|
||||
select 6
|
||||
\N 1232 Johny
|
@ -0,0 +1,58 @@
|
||||
drop table if EXISTS l;
|
||||
drop table if EXISTS r;
|
||||
|
||||
CREATE TABLE l (luid Nullable(Int16), name String)
|
||||
ENGINE=MergeTree order by luid settings allow_nullable_key=1 as
|
||||
select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---'));
|
||||
|
||||
CREATE TABLE r (ruid Nullable(Int16), name String)
|
||||
ENGINE=MergeTree order by ruid settings allow_nullable_key=1 as
|
||||
select * from VALUES ((1231, 'John'),(1232, 'Johny'));
|
||||
|
||||
select 'select 1';
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
select 'select 2';
|
||||
select * from (
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid)
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
select 'select 3';
|
||||
select * from (
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid
|
||||
limit 100000000)
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
drop table l;
|
||||
drop table r;
|
||||
|
||||
CREATE TABLE l (luid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as
|
||||
select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---'));
|
||||
|
||||
CREATE TABLE r (ruid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as
|
||||
select * from VALUES ((1231, 'John'),(1232, 'Johny'));
|
||||
|
||||
select 'select 4';
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
select 'select 5';
|
||||
select * from (
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid)
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
select 'select 6';
|
||||
select * from (
|
||||
SELECT * FROM l full outer join r on l.luid = r.ruid
|
||||
limit 100000000)
|
||||
where luid is null
|
||||
and ruid is not null;
|
||||
|
||||
drop table l;
|
||||
drop table r;
|
@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1
|
||||
|
||||
cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject"
|
||||
|
||||
|
@ -9,9 +9,9 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2"
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string"
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple()"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1
|
||||
|
||||
cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2 FORMAT JSONAsObject"
|
||||
cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString"
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user