Merge branch 'master' into legacy-tuple-name

This commit is contained in:
mergify[bot] 2021-08-07 16:42:20 +00:00 committed by GitHub
commit 589505e76b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
268 changed files with 3929 additions and 1652 deletions

View File

@ -7,6 +7,6 @@ assignees: ''
---
Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
If you still prefer GitHub issues, remove all this text and ask your question here.
> If you still prefer GitHub issues, remove all this text and ask your question here.

View File

@ -7,16 +7,20 @@ assignees: ''
---
(you don't have to strictly follow this form)
> (you don't have to strictly follow this form)
**Use case**
A clear and concise description of what is the intended usage scenario is.
> A clear and concise description of what is the intended usage scenario is.
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
> A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
> A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
> Add any other context or screenshots about the feature request here.

View File

@ -7,11 +7,11 @@ assignees: ''
---
You have to provide the following information whenever possible.
> You have to provide the following information whenever possible.
**Describe the bug**
A clear and concise description of what works not as it is supposed to.
> A clear and concise description of what works not as it is supposed to.
**Does it reproduce on recent release?**
@ -19,7 +19,7 @@ A clear and concise description of what works not as it is supposed to.
**Enable crash reporting**
If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`:
> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`:
```
<send_crash_reports>
@ -39,12 +39,12 @@ If possible, change "enabled" to true in "send_crash_reports" section in `config
**Expected behavior**
A clear and concise description of what you expected to happen.
> A clear and concise description of what you expected to happen.
**Error message and/or stacktrace**
If applicable, add screenshots to help explain your problem.
> If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.
> Add any other context about the problem here.

View File

@ -7,10 +7,11 @@ assignees: ''
---
Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
**Operating system**
OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.
> OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.
**Cmake version**

2
contrib/AMQP-CPP vendored

@ -1 +1 @@
Subproject commit 03781aaff0f10ef41f902b8cf865fe0067180c10
Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a

View File

@ -10,11 +10,12 @@ set (SRCS
"${LIBRARY_DIR}/src/deferredconsumer.cpp"
"${LIBRARY_DIR}/src/deferredextreceiver.cpp"
"${LIBRARY_DIR}/src/deferredget.cpp"
"${LIBRARY_DIR}/src/deferredpublisher.cpp"
"${LIBRARY_DIR}/src/deferredrecall.cpp"
"${LIBRARY_DIR}/src/deferredreceiver.cpp"
"${LIBRARY_DIR}/src/field.cpp"
"${LIBRARY_DIR}/src/flags.cpp"
"${LIBRARY_DIR}/src/linux_tcp/openssl.cpp"
"${LIBRARY_DIR}/src/linux_tcp/sslerrorprinter.cpp"
"${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp"
"${LIBRARY_DIR}/src/inbuffer.cpp"
"${LIBRARY_DIR}/src/receivedframe.cpp"

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit debf751a129bdda9ff4d1e895e08957ff77000a1
Subproject commit 078e21bad344747b7656ef2d7a4f7410a0a303eb

View File

@ -194,9 +194,18 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -207,6 +216,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
@ -214,15 +224,18 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/csv/chunker.cc"
"${LIBRARY_DIR}/csv/column_builder.cc"
@ -231,6 +244,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/csv/options.cc"
"${LIBRARY_DIR}/csv/parser.cc"
"${LIBRARY_DIR}/csv/reader.cc"
"${LIBRARY_DIR}/csv/writer.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc"
@ -247,6 +261,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
@ -257,9 +272,9 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb

View File

@ -0,0 +1,13 @@
version: '2.3'
services:
mongo1:
image: mongo:3.6
restart: always
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: clickhouse
volumes:
- ${MONGO_CONFIG_PATH}:/mongo/
ports:
- ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT}
command: --config /mongo/mongo_secure.conf --profile=2 --verbose

View File

@ -183,6 +183,10 @@ for conn_index, c in enumerate(all_connections):
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
c.settings[s.tag] = s.text
# We have to perform a query to make sure the settings work. Otherwise an
# unknown setting will lead to failing precondition check, and we will skip
# the test, which is wrong.
c.execute("select 1")
reportStageEnd('settings')

View File

@ -28,7 +28,7 @@ RUN apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
RUN set -x \
&& export PUBKEY_HASHSUM="486a0694c7f92e96190bbfac01c3b5ac2cb7823981db510a28f744c99eabbbf17a7bcee53ca42dc6d84d4323c2742761" \
&& export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
&& apt-key add /tmp/pubkey.txt \

View File

@ -15,7 +15,7 @@ Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `
``` sql
CREATE DATABASE test_database
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use_table_cache`]);
```
**Engine Parameters**
@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — Remote database name.
- `user` — PostgreSQL user.
- `password` — User password.
- `schema` — PostgreSQL schema.
- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`.
## Data Types Support {#data_types-support}

View File

@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
name1 [type1],
name2 [type2],
...
) ENGINE = MongoDB(host:port, database, collection, user, password);
) ENGINE = MongoDB(host:port, database, collection, user, password [, options]);
```
**Engine Parameters**
@ -30,9 +30,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
- `password` — User password.
- `options` — MongoDB connection string options (optional parameter).
## Usage Example {#usage-example}
Table in ClickHouse which allows to read data from MongoDB collection:
Create a table in ClickHouse which allows to read data from MongoDB collection:
``` text
CREATE TABLE mongo_table
@ -42,6 +44,16 @@ CREATE TABLE mongo_table
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
```
To read from an SSL secured MongoDB server:
``` text
CREATE TABLE mongo_table_ssl
(
key UInt64,
data String
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true');
```
Query:
``` sql

View File

@ -84,6 +84,8 @@ Features:
- Table data preview.
- Full-text search.
By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set session_id to a random string (it uses the http connection under the hood). Then you can use any setting from the query window
### clickhouse-cli {#clickhouse-cli}
[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) is an alternative command-line client for ClickHouse, written in Python 3.

View File

@ -28,7 +28,7 @@ Structure of the `users` section:
<profile>profile_name</profile>
<quota>default</quota>
<default_database>default<default_database>
<databases>
<database_name>
<table_name>

View File

@ -82,6 +82,7 @@ The next 4 columns have a non-zero value only where there is an active session w
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly.

View File

@ -275,9 +275,13 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell
When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache.
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used, and it is re-requested the next time it needs to be used.
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`.
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table.
If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source.
To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally.
Supported [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): MySQL, ClickHouse, executable, HTTP.
@ -289,6 +293,16 @@ Example of settings:
<cache>
<!-- The size of the cache, in number of cells. Rounded up to a power of two. -->
<size_in_cells>1000000000</size_in_cells>
<!-- Allows to read expired keys. -->
<allow_read_expired_keys>0</allow_read_expired_keys>
<!-- Max size of update queue. -->
<max_update_queue_size>100000</max_update_queue_size>
<!-- Max timeout in milliseconds for push update task into queue. -->
<update_queue_push_timeout_milliseconds>10</update_queue_push_timeout_milliseconds>
<!-- Max wait timeout in milliseconds for update task to complete. -->
<query_wait_timeout_milliseconds>60000</query_wait_timeout_milliseconds>
<!-- Max threads for cache dictionary update. -->
<max_threads_for_updates>4</max_threads_for_updates>
</cache>
</layout>
```
@ -315,7 +329,7 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
### ssd_cache {#ssd-cache}
Similar to `cache`, but stores data on SSD and index in RAM.
Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries.
``` xml
<layout>

View File

@ -3,11 +3,14 @@ toc_priority: 67
toc_title: NLP
---
# Natural Language Processing functions {#nlp-functions}
# [experimental] Natural Language Processing functions {#nlp-functions}
!!! warning "Warning"
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
## stem {#stem}
Performs stemming on a previously tokenized text.
Performs stemming on a given word.
**Syntax**
@ -38,7 +41,7 @@ Result:
## lemmatize {#lemmatize}
Performs lemmatization on a given word.
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
**Syntax**
@ -79,7 +82,11 @@ Configuration:
## synonyms {#synonyms}
Finds synonyms to a given word.
Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`.
With the `plain` extension type we need to provide a path to a simple text file, where each line corresponds to a certain synonym set. Words in this line must be separated with space or tab characters.
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
**Syntax**
@ -89,7 +96,7 @@ synonyms('extension_name', word)
**Arguments**
- `extension_name` — Name of the extention in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).
**Examples**
@ -122,4 +129,4 @@ Configuration:
<path>en/</path>
</extension>
</synonyms_extensions>
```
```

View File

@ -2138,3 +2138,52 @@ Result:
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
## currentProfiles {#current-profiles}
Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user.
The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
**Syntax**
``` sql
currentProfiles()
```
**Returned value**
- List of the current user settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## enabledProfiles {#enabled-profiles}
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
**Syntax**
``` sql
enabledProfiles()
```
**Returned value**
- List of the enabled settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## defaultProfiles {#default-profiles}
Returns all the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement).
**Syntax**
``` sql
defaultProfiles()
```
**Returned value**
- List of the default settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

View File

@ -11,7 +11,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types
**Syntax**
``` sql
```sql
map(key1, value1[, key2, value2, ...])
```
@ -30,7 +30,7 @@ Type: [Map(key, value)](../../sql-reference/data-types/map.md).
Query:
``` sql
```sql
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
```
@ -46,7 +46,7 @@ Result:
Query:
``` sql
```sql
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
SELECT a['key2'] FROM table_map;
@ -54,7 +54,7 @@ SELECT a['key2'] FROM table_map;
Result:
``` text
```text
┌─arrayElement(a, 'key2')─┐
│ 0 │
│ 2 │
@ -72,7 +72,7 @@ Collect all the keys and sum corresponding values.
**Syntax**
``` sql
```sql
mapAdd(arg1, arg2 [, ...])
```
@ -88,13 +88,13 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq
Query with a tuple map:
``` sql
```sql
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
```
Result:
``` text
```text
┌─res───────────┬─type───────────────────────────────┐
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
└───────────────┴────────────────────────────────────┘
@ -102,7 +102,16 @@ Result:
Query with `Map` type:
``` sql
```sql
SELECT mapAdd(map(1,1), map(1,1));
```
Result:
```text
┌─mapAdd(map(1, 1), map(1, 1))─┐
│ {1:2} │
└──────────────────────────────┘
```
## mapSubtract {#function-mapsubtract}
@ -111,21 +120,21 @@ Collect all the keys and subtract corresponding values.
**Syntax**
``` sql
```sql
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
```
**Arguments**
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
**Returned value**
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
**Example**
Query:
Query with a tuple map:
```sql
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
@ -139,32 +148,54 @@ Result:
└────────────────┴───────────────────────────────────┘
```
Query with `Map` type:
```sql
SELECT mapSubtract(map(1,1), map(1,1));
```
Result:
```text
┌─mapSubtract(map(1, 1), map(1, 1))─┐
│ {1:0} │
└───────────────────────────────────┘
```
## mapPopulateSeries {#function-mappopulateseries}
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Syntax**
``` sql
```sql
mapPopulateSeries(keys, values[, max])
mapPopulateSeries(map[, max])
```
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
The number of elements in `keys` and `values` must be the same for each row.
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
**Arguments**
Mapped arrays:
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
or
- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
**Example**
Query:
Query with mapped arrays:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
@ -178,13 +209,27 @@ Result:
└──────────────────────────────┴───────────────────────────────────┘
```
Query with `Map` type:
```sql
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
```
Result:
```text
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
└─────────────────────────────────────────┘
```
## mapContains {#mapcontains}
Determines whether the `map` contains the `key` parameter.
**Syntax**
``` sql
```sql
mapContains(map, key)
```

View File

@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
```

View File

@ -274,28 +274,28 @@ This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../
`WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters.
All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults.
Use following syntax for filling multiple columns add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
To fill multiple columns, add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` can be applied only for fields with Numeric (all kind of float, decimal, int) or Date/DateTime types.
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
For example, the following query
Example of a query without `WITH FILL`:
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
) ORDER BY n;
```
returns
Result:
``` text
┌─n─┬─source───┐
@ -305,16 +305,16 @@ returns
└───┴──────────┘
```
but after apply `WITH FILL` modifier
Same query after applying `WITH FILL` modifier:
``` sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5;
```
returns
Result:
``` text
┌───n─┬─source───┐
@ -334,7 +334,7 @@ returns
└─────┴──────────┘
```
For the case when we have multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in `ORDER BY` clause.
For the case with multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in the `ORDER BY` clause.
Example:
@ -350,7 +350,7 @@ ORDER BY
d1 WITH FILL STEP 5;
```
returns
Result:
``` text
┌───d1───────┬───d2───────┬─source───┐
@ -364,9 +364,9 @@ returns
└────────────┴────────────┴──────────┘
```
Field `d1` does not fill and use default value cause we do not have repeated values for `d2` value, and sequence for `d1` cant be properly calculated.
Field `d1` does not fill in and use the default value cause we do not have repeated values for `d2` value, and the sequence for `d1` cant be properly calculated.
The following query with a changed field in `ORDER BY`
The following query with the changed field in `ORDER BY`:
``` sql
SELECT
@ -380,7 +380,7 @@ ORDER BY
d2 WITH FILL;
```
returns
Result:
``` text
┌───d1───────┬───d2───────┬─source───┐

View File

@ -5,9 +5,6 @@ toc_title: Window Functions
# [experimental] Window Functions
!!! warning "Warning"
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in the future releases. Set `allow_experimental_window_functions = 1` to enable it.
ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
| Feature | Support or workaround |

View File

@ -87,7 +87,7 @@ toc_title: "Введение"
Виртуальный столбец — это неотъемлемый атрибут движка таблиц, определенный в исходном коде движка.
Виртуальные столбцы не надо указывать в запросе `CREATE TABLE` и их не отображаются в результатах запросов `SHOW CREATE TABLE` и `DESCRIBE TABLE`. Также виртуальные столбцы доступны только для чтения, поэтому вы не можете вставлять в них данные.
Виртуальные столбцы не надо указывать в запросе `CREATE TABLE` и они не отображаются в результатах запросов `SHOW CREATE TABLE` и `DESCRIBE TABLE`. Также виртуальные столбцы доступны только для чтения, поэтому вы не можете вставлять в них данные.
Чтобы получить данные из виртуального столбца, необходимо указать его название в запросе `SELECT`. `SELECT *` не отображает данные из виртуальных столбцов.

View File

@ -134,7 +134,7 @@ default
- `regexp` шаблон имени метрики.
- `age` минимальный возраст данных в секундах.
- `precision` точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
- `function` имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`.
- `function` имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
### Пример конфигурации {#configuration-example}
@ -171,3 +171,6 @@ default
</graphite_rollup>
```
!!! warning "Внимание"
Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize/). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).

View File

@ -111,7 +111,7 @@ toc_title: "Визуальные интерфейсы от сторонних р
### DataGrip {#datagrip}
[DataGrip](https://www.jetbrains.com/datagrip/) — это IDE для баз данных о JetBrains с выделенной поддержкой ClickHouse. Он также встроен в другие инструменты на основе IntelliJ: PyCharm, IntelliJ IDEA, GoLand, PhpStorm и другие.
[DataGrip](https://www.jetbrains.com/datagrip/) — это IDE для баз данных от JetBrains с выделенной поддержкой ClickHouse. Он также встроен в другие инструменты на основе IntelliJ: PyCharm, IntelliJ IDEA, GoLand, PhpStorm и другие.
Основные возможности:

View File

@ -3,7 +3,10 @@ toc_priority: 67
toc_title: NLP
---
# Функции для работы с ествественным языком {#nlp-functions}
# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
!!! warning "Предупреждение"
Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
## stem {#stem}
@ -38,7 +41,7 @@ Result:
## lemmatize {#lemmatize}
Данная функция проводит лемматизацию для заданного слова.
Данная функция проводит лемматизацию для заданного слова. Для работы лемматизатора необходимы словари, которые можно найти [здесь](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
**Синтаксис**
@ -79,7 +82,11 @@ SELECT lemmatize('en', 'wolves');
## synonyms {#synonyms}
Находит синонимы к заданному слову.
Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.
**Синтаксис**

View File

@ -2088,3 +2088,52 @@ SELECT tcpPort();
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
## currentProfiles {#current-profiles}
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
**Синтаксис**
``` sql
currentProfiles()
```
**Возвращаемое значение**
- Список профилей настроек для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## enabledProfiles {#enabled-profiles}
Возвращает профили настроек, назначенные пользователю как явно, так и неявно. Явно назначенные профили — это те же профили, которые возвращает функция [currentProfiles](#current-profiles). Неявно назначенные профили включают родительские профили других назначенных профилей; профили, назначенные с помощью предоставленных ролей; профили, назначенные с помощью собственных настроек; основной профиль по умолчанию (см. секцию `default_profile` в основном конфигурационном файле сервера).
**Синтаксис**
``` sql
enabledProfiles()
```
**Возвращаемое значение**
- Список доступных профилей для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## defaultProfiles {#default-profiles}
Возвращает все профили, указанные при объявлении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement))
**Синтаксис**
``` sql
defaultProfiles()
```
**Возвращаемое значение**
- Список профилей по умолчанию.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

View File

@ -271,8 +271,8 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
Этот модификатор также может быть скобинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
`WITH FILL` модификатор может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
Все пропущенные значнеия для колонки `expr` будут заполненые значениями соответсвующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значенями по умолчанию.
Модификатор `WITH FILL` может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
Все пропущенные значения для колонки `expr` будут заполнены значениями, соответствующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значениями по умолчанию.
Используйте следующую конструкцию для заполнения нескольких колонок с модификатором `WITH FILL` с необязательными параметрами после каждого имени поля в секции `ORDER BY`.
@ -280,22 +280,22 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` может быть применене только к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами.
`WITH FILL` может быть применен к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами. В случае применения к полям типа `String` недостающие значения заполняются пустой строкой.
Когда не определен `FROM const_expr`, последовательность заполнения использует минимальное значение поля `expr` из `ORDER BY`.
Когда не определен `TO const_expr`, последовательность заполнения использует максимальное значение поля `expr` из `ORDER BY`.
Когда `STEP const_numeric_expr` определен, тогда `const_numeric_expr` интерпретируется `как есть` для числовых типов, как `дни` для типа Date и как `секунды` для типа DateTime.
Когда `STEP const_numeric_expr` определен, `const_numeric_expr` интерпретируется "как есть" для числовых типов, как "дни" для типа `Date` и как "секунды" для типа `DateTime`.
Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime.
Для примера, следующий запрос
Пример запроса без использования `WITH FILL`:
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
) ORDER BY n;
```
возвращает
Результат:
```text
┌─n─┬─source───┐
│ 1 │ original │
@ -304,7 +304,7 @@ SELECT n, source FROM (
└───┴──────────┘
```
но после применения модификатора `WITH FILL`
Тот же запрос после применения модификатора `WITH FILL`:
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
@ -312,7 +312,8 @@ SELECT n, source FROM (
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
```
возвращает
Результат:
```text
┌───n─┬─source───┐
│ 0 │ │
@ -331,13 +332,13 @@ SELECT n, source FROM (
└─────┴──────────┘
```
Для случая когда у нас есть несколько полей `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет следовать порядку полей в секции `ORDER BY`.
Для случая с несколькими полями `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет соответствовать порядку полей в секции `ORDER BY`.
Пример:
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
@ -346,7 +347,7 @@ ORDER BY
d1 WITH FILL STEP 5;
```
возвращает
Результат:
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
@ -359,9 +360,9 @@ ORDER BY
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняет и используется значение по умолчанию поскольку у нас нет повторяющихся значения для `d2` поэтому мы не можем правильно рассчитать последователность заполнения для`d1`.
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
едующий запрос (с измененым порядком в ORDER BY)
едующий запрос (с измененым порядком в ORDER BY):
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
@ -374,7 +375,7 @@ ORDER BY
d2 WITH FILL;
```
возвращает
Результат:
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │

View File

@ -197,7 +197,7 @@ private:
std::unique_ptr<ShellCommand> pager_cmd;
/// The user can specify to redirect query output to a file.
std::optional<WriteBufferFromFile> out_file_buf;
std::unique_ptr<WriteBuffer> out_file_buf;
BlockOutputStreamPtr block_out_stream;
/// The user could specify special file for server logs (stderr by default)
@ -1452,7 +1452,12 @@ private:
"Error while reconnecting to the server: {}\n",
getCurrentExceptionMessage(true));
assert(!connection->isConnected());
// The reconnection might fail, but we'll still be connected
// in the sense of `connection->isConnected() = true`,
// in case when the requested database doesn't exist.
// Disconnect manually now, so that the following code doesn't
// have any doubts, and the connection state is predictable.
connection->disconnect();
}
}
@ -2238,8 +2243,11 @@ private:
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
const auto & out_file = out_file_node.value.safeGet<std::string>();
out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
out_buf = &*out_file_buf;
out_file_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
chooseCompressionMethod(out_file, ""),
/* compression level = */ 3
);
// We are writing to file, so default format is the same as in non-interactive mode.
if (is_interactive && is_default_format)
@ -2259,9 +2267,9 @@ private:
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!need_render_progress)
block_out_stream = context->getOutputStreamParallelIfPossible(current_format, *out_buf, block);
block_out_stream = context->getOutputStreamParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
block_out_stream = context->getOutputStream(current_format, *out_buf, block);
block_out_stream = context->getOutputStream(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
block_out_stream->writePrefix();
}

View File

@ -12,8 +12,8 @@ namespace DB
Poco::URI uri{request.getURI()};
LOG_DEBUG(log, "Request URI: {}", uri.toString());
if (uri == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<PingHandler>(keep_alive_timeout);
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<LibraryExistsHandler>(keep_alive_timeout, getContext());
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
return std::make_unique<LibraryRequestHandler>(keep_alive_timeout, getContext());

View File

@ -17,8 +17,24 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_REQUEST_PARAMETER;
}
namespace
{
void processError(HTTPServerResponse & response, const std::string & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), message);
}
std::shared_ptr<Block> parseColumns(std::string && column_string)
{
auto sample_block = std::make_shared<Block>();
@ -30,9 +46,8 @@ namespace
return sample_block;
}
std::vector<uint64_t> parseIdsFromBinary(const std::string & ids_string)
std::vector<uint64_t> parseIdsFromBinary(ReadBuffer & buf)
{
ReadBufferFromString buf(ids_string);
std::vector<uint64_t> ids;
readVectorBinary(ids, buf);
return ids;
@ -67,13 +82,36 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
std::string method = params.get("method");
std::string dictionary_id = params.get("dictionary_id");
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{
if (method == "libNew")
bool lib_new = (method == "libNew");
if (method == "libClone")
{
if (!params.has("from_dictionary_id"))
{
processError(response, "No 'from_dictionary_id' in request URL");
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
bool cloned = false;
cloned = SharedLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
if (cloned)
{
writeStringBinary("1", out);
}
else
{
LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call libNew instead");
lib_new = true;
}
}
if (lib_new)
{
auto & read_buf = request.getStream();
params.read(read_buf);
@ -92,6 +130,8 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
std::string library_path = params.get("library_path");
const auto & settings_string = params.get("library_settings");
LOG_DEBUG(log, "Parsing library settings from binary string");
std::vector<std::string> library_settings = parseNamesFromBinary(settings_string);
/// Needed for library dictionary
@ -102,6 +142,8 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
const auto & attributes_string = params.get("attributes_names");
LOG_DEBUG(log, "Parsing attributes names from binary string");
std::vector<std::string> attributes_names = parseNamesFromBinary(attributes_string);
/// Needed to parse block from binary string format
@ -140,54 +182,63 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
SharedLibraryHandlerFactory::instance().create(dictionary_id, library_path, library_settings, sample_block_with_nulls, attributes_names);
writeStringBinary("1", out);
}
else if (method == "libClone")
{
if (!params.has("from_dictionary_id"))
{
processError(response, "No 'from_dictionary_id' in request URL");
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
LOG_TRACE(log, "Calling libClone from {} to {}", from_dictionary_id, dictionary_id);
SharedLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
writeStringBinary("1", out);
}
else if (method == "libDelete")
{
SharedLibraryHandlerFactory::instance().remove(dictionary_id);
auto deleted = SharedLibraryHandlerFactory::instance().remove(dictionary_id);
/// Do not throw, a warning is ok.
if (!deleted)
LOG_WARNING(log, "Cannot delete library for with dictionary id: {}, because such id was not found.", dictionary_id);
writeStringBinary("1", out);
}
else if (method == "isModified")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->isModified();
writeStringBinary(std::to_string(res), out);
}
else if (method == "supportsSelectiveLoad")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->supportsSelectiveLoad();
writeStringBinary(std::to_string(res), out);
}
else if (method == "loadAll")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadAll() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadAll();
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
else if (method == "loadIds")
{
LOG_DEBUG(log, "Getting diciontary ids for dictionary with id: {}", dictionary_id);
String ids_string;
readString(ids_string, request.getStream());
std::vector<uint64_t> ids = parseIdsFromBinary(ids_string);
std::vector<uint64_t> ids = parseIdsFromBinary(request.getStream());
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadIds() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadIds(ids);
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
@ -219,8 +270,14 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
auto block = reader->read();
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadKeys() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadKeys(block.getColumns());
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
@ -228,8 +285,9 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
catch (...)
{
auto message = getCurrentExceptionMessage(true);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
LOG_ERROR(log, "Failed to process request for dictionary_id: {}. Error: {}", dictionary_id, message);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
try
{
writeStringBinary(message, out);
@ -239,8 +297,6 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
{
tryLogCurrentException(log);
}
tryLogCurrentException(log);
}
try
@ -254,24 +310,30 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
void LibraryRequestHandler::processError(HTTPServerResponse & response, const std::string & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
LOG_WARNING(log, message);
}
void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
void LibraryExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
if (!params.has("dictionary_id"))
{
processError(response, "No 'dictionary_id' in request URL");
return;
}
std::string dictionary_id = params.get("dictionary_id");
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
String res;
if (library_handler)
res = "1";
else
res = "0";
setResponseDefaultHeaders(response, keep_alive_timeout);
const char * data = "Ok.\n";
response.sendBuffer(data, strlen(data));
LOG_TRACE(log, "Senging ping response: {} (dictionary id: {})", res, dictionary_id);
response.sendBuffer(res.data(), res.size());
}
catch (...)
{

View File

@ -22,8 +22,7 @@ class LibraryRequestHandler : public HTTPRequestHandler, WithContext
public:
LibraryRequestHandler(
size_t keep_alive_timeout_,
ContextPtr context_)
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
@ -35,18 +34,18 @@ public:
private:
static constexpr inline auto FORMAT = "RowBinary";
void processError(HTTPServerResponse & response, const std::string & message);
Poco::Logger * log;
size_t keep_alive_timeout;
};
class PingHandler : public HTTPRequestHandler
class LibraryExistsHandler : public HTTPRequestHandler, WithContext
{
public:
explicit PingHandler(size_t keep_alive_timeout_)
: keep_alive_timeout(keep_alive_timeout_)
explicit LibraryExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
{
}
@ -54,6 +53,8 @@ public:
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
}

View File

@ -4,12 +4,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
@ -18,7 +12,7 @@ SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dic
if (library_handler != library_handlers.end())
return library_handler->second;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found dictionary with id: {}", dictionary_id);
return nullptr;
}
@ -30,32 +24,32 @@ void SharedLibraryHandlerFactory::create(
const std::vector<std::string> & attributes_names)
{
std::lock_guard lock(mutex);
library_handlers[dictionary_id] = std::make_shared<SharedLibraryHandler>(library_path, library_settings, sample_block, attributes_names);
if (!library_handlers.count(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<SharedLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
LOG_WARNING(&Poco::Logger::get("SharedLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
}
void SharedLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
bool SharedLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
/// This is not supposed to happen as libClone is called from copy constructor of LibraryDictionarySource
/// object, and shared library handler of from_dictionary is removed only in its destructor.
/// And if for from_dictionary there was no shared library handler, it would have received and exception in
/// its constructor, so no libClone would be made from it.
if (from_library_handler == library_handlers.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No shared library handler found");
return false;
/// libClone method will be called in copy constructor
library_handlers[to_dictionary_id] = std::make_shared<SharedLibraryHandler>(*from_library_handler->second);
return true;
}
void SharedLibraryHandlerFactory::remove(const std::string & dictionary_id)
bool SharedLibraryHandlerFactory::remove(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
/// libDelete is called in destructor.
library_handlers.erase(dictionary_id);
return library_handlers.erase(dictionary_id);
}

View File

@ -24,9 +24,9 @@ public:
const Block & sample_block,
const std::vector<std::string> & attributes_names);
void clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
bool clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
void remove(const std::string & dictionary_id);
bool remove(const std::string & dictionary_id);
private:
/// map: dict_id -> sharedLibraryHandler

View File

@ -361,23 +361,22 @@
function postImpl(posted_request_num, query)
{
/// TODO: Check if URL already contains query string (append parameters).
const user = document.getElementById('user').value;
const password = document.getElementById('password').value;
let user = document.getElementById('user').value;
let password = document.getElementById('password').value;
const server_address = document.getElementById('url').value;
let server_address = document.getElementById('url').value;
let url = server_address +
const url = server_address +
(server_address.indexOf('?') >= 0 ? '&' : '?') +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
'add_http_cors_header=1' +
'&user=' + encodeURIComponent(user) +
'&password=' + encodeURIComponent(password) +
'&default_format=JSONCompact' +
/// Safety settings to prevent results that browser cannot display.
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
let xhr = new XMLHttpRequest;
const xhr = new XMLHttpRequest;
xhr.open('POST', url, true);
@ -391,12 +390,12 @@
/// The query is saved in browser history (in state JSON object)
/// as well as in URL fragment identifier.
if (query != previous_query) {
let state = {
const state = {
query: query,
status: this.status,
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
};
let title = "ClickHouse Query: " + query;
const title = "ClickHouse Query: " + query;
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (server_address != location.origin) {

View File

@ -33,24 +33,9 @@ Poco::URI IBridgeHelper::getPingURI() const
}
bool IBridgeHelper::checkBridgeIsRunning() const
void IBridgeHelper::startBridgeSync()
{
try
{
ReadWriteBufferFromHTTP buf(
getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()));
return checkString(PING_OK_ANSWER, buf);
}
catch (...)
{
return false;
}
}
void IBridgeHelper::startBridgeSync() const
{
if (!checkBridgeIsRunning())
if (!bridgeHandShake())
{
LOG_TRACE(getLog(), "{} is not running, will try to start it", serviceAlias());
startBridge(startBridgeCommand());
@ -64,7 +49,7 @@ void IBridgeHelper::startBridgeSync() const
++counter;
LOG_TRACE(getLog(), "Checking {} is running, try {}", serviceAlias(), counter);
if (checkBridgeIsRunning())
if (bridgeHandShake())
{
started = true;
break;
@ -81,7 +66,7 @@ void IBridgeHelper::startBridgeSync() const
}
std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand() const
std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
{
if (startBridgeManually())
throw Exception(serviceAlias() + " is not running. Please, start it manually", ErrorCodes::EXTERNAL_SERVER_IS_NOT_RESPONDING);

View File

@ -28,16 +28,19 @@ public:
static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST;
explicit IBridgeHelper(ContextPtr context_) : WithContext(context_) {}
virtual ~IBridgeHelper() = default;
void startBridgeSync() const;
virtual ~IBridgeHelper() = default;
Poco::URI getMainURI() const;
Poco::URI getPingURI() const;
void startBridgeSync();
protected:
/// Check bridge is running. Can also check something else in the mean time.
virtual bool bridgeHandShake() = 0;
/// clickhouse-odbc-bridge, clickhouse-library-bridge
virtual String serviceAlias() const = 0;
@ -61,9 +64,7 @@ protected:
private:
bool checkBridgeIsRunning() const;
std::unique_ptr<ShellCommand> startBridgeCommand() const;
std::unique_ptr<ShellCommand> startBridgeCommand();
};
}

View File

@ -1,6 +1,5 @@
#include "LibraryBridgeHelper.h"
#include <IO/ReadHelpers.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <DataStreams/formatBlock.h>
@ -8,6 +7,8 @@
#include <Processors/Formats/InputStreamFromInputFormat.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Formats/FormatFactory.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/ShellCommand.h>
@ -20,16 +21,25 @@
namespace DB
{
namespace ErrorCodes
{
extern const int EXTERNAL_LIBRARY_ERROR;
extern const int LOGICAL_ERROR;
}
LibraryBridgeHelper::LibraryBridgeHelper(
ContextPtr context_,
const Block & sample_block_,
const Field & dictionary_id_)
const Field & dictionary_id_,
const LibraryInitData & library_data_)
: IBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get("LibraryBridgeHelper"))
, sample_block(sample_block_)
, config(context_->getConfigRef())
, http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value)
, library_data(library_data_)
, dictionary_id(dictionary_id_)
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_))
{
bridge_port = config.getUInt("library_bridge.port", DEFAULT_PORT);
bridge_host = config.getString("library_bridge.host", DEFAULT_HOST);
@ -61,26 +71,91 @@ void LibraryBridgeHelper::startBridge(std::unique_ptr<ShellCommand> cmd) const
}
bool LibraryBridgeHelper::initLibrary(const std::string & library_path, const std::string library_settings, const std::string attributes_names)
bool LibraryBridgeHelper::bridgeHandShake()
{
startBridgeSync();
auto uri = createRequestURI(LIB_NEW_METHOD);
String result;
try
{
ReadWriteBufferFromHTTP buf(createRequestURI(PING), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts);
readString(result, buf);
}
catch (...)
{
return false;
}
/*
* When pinging bridge we also pass current dicionary_id. The bridge will check if there is such
* dictionary. It is possible that such dictionary_id is not present only in two cases:
* 1. It is dictionary source creation and initialization of library handler on bridge side did not happen yet.
* 2. Bridge crashed or restarted for some reason while server did not.
**/
if (result.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check bridge and server have the same version.", result);
UInt8 dictionary_id_exists;
auto parsed = tryParse<UInt8>(dictionary_id_exists, result);
if (!parsed || (dictionary_id_exists != 0 && dictionary_id_exists != 1))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {} ({}). Check bridge and server have the same version.",
result, parsed ? toString(dictionary_id_exists) : "failed to parse");
LOG_TRACE(log, "dictionary_id: {}, dictionary_id_exists on bridge side: {}, library confirmed to be initialized on server side: {}",
toString(dictionary_id), toString(dictionary_id_exists), library_initialized);
if (dictionary_id_exists && !library_initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Library was not initialized, but bridge responded to already have dictionary id: {}", dictionary_id);
/// Here we want to say bridge to recreate a new library handler for current dictionary,
/// because it responded to have lost it, but we know that it has already been created. (It is a direct result of bridge crash).
if (!dictionary_id_exists && library_initialized)
{
LOG_WARNING(log, "Library bridge does not have library handler with dictionaty id: {}. It will be reinitialized.", dictionary_id);
bool reinitialized = false;
try
{
auto uri = createRequestURI(LIB_NEW_METHOD);
reinitialized = executeRequest(uri, getInitLibraryCallback());
}
catch (...)
{
tryLogCurrentException(log);
return false;
}
if (!reinitialized)
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR,
"Failed to reinitialize library handler on bridge side for dictionary with id: {}", dictionary_id);
}
return true;
}
ReadWriteBufferFromHTTP::OutStreamCallback LibraryBridgeHelper::getInitLibraryCallback() const
{
/// Sample block must contain null values
WriteBufferFromOwnString out;
auto output_stream = getContext()->getOutputStream(LibraryBridgeHelper::DEFAULT_FORMAT, out, sample_block);
formatBlock(output_stream, sample_block);
auto block_string = out.str();
auto out_stream_callback = [library_path, library_settings, attributes_names, block_string, this](std::ostream & os)
return [block_string, this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(library_path) << "&";
os << "library_settings=" << escapeForFileName(library_settings) << "&";
os << "attributes_names=" << escapeForFileName(attributes_names) << "&";
os << "library_path=" << escapeForFileName(library_data.library_path) << "&";
os << "library_settings=" << escapeForFileName(library_data.library_settings) << "&";
os << "attributes_names=" << escapeForFileName(library_data.dict_attributes) << "&";
os << "sample_block=" << escapeForFileName(sample_block.getNamesAndTypesList().toString()) << "&";
os << "null_values=" << escapeForFileName(block_string);
};
return executeRequest(uri, out_stream_callback);
}
bool LibraryBridgeHelper::initLibrary()
{
startBridgeSync();
auto uri = createRequestURI(LIB_NEW_METHOD);
library_initialized = executeRequest(uri, getInitLibraryCallback());
return library_initialized;
}
@ -89,15 +164,23 @@ bool LibraryBridgeHelper::cloneLibrary(const Field & other_dictionary_id)
startBridgeSync();
auto uri = createRequestURI(LIB_CLONE_METHOD);
uri.addQueryParameter("from_dictionary_id", toString(other_dictionary_id));
return executeRequest(uri);
/// We also pass initialization settings in order to create a library handler
/// in case from_dictionary_id does not exist in bridge side (possible in case of bridge crash).
library_initialized = executeRequest(uri, getInitLibraryCallback());
return library_initialized;
}
bool LibraryBridgeHelper::removeLibrary()
{
startBridgeSync();
auto uri = createRequestURI(LIB_DELETE_METHOD);
return executeRequest(uri);
/// Do not force bridge restart if it is not running in case of removeLibrary
/// because in this case after restart it will not have this dictionaty id in memory anyway.
if (bridgeHandShake())
{
auto uri = createRequestURI(LIB_DELETE_METHOD);
return executeRequest(uri);
}
return true;
}
@ -125,10 +208,12 @@ BlockInputStreamPtr LibraryBridgeHelper::loadAll()
}
BlockInputStreamPtr LibraryBridgeHelper::loadIds(const std::string ids_string)
BlockInputStreamPtr LibraryBridgeHelper::loadIds(const std::vector<uint64_t> & ids)
{
startBridgeSync();
auto uri = createRequestURI(LOAD_IDS_METHOD);
uri.addQueryParameter("ids_num", toString(ids.size())); /// Not used parameter, but helpful
auto ids_string = getDictIdsString(ids);
return loadBase(uri, [ids_string](std::ostream & os) { os << ids_string; });
}
@ -149,13 +234,13 @@ BlockInputStreamPtr LibraryBridgeHelper::loadKeys(const Block & requested_block)
}
bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback)
bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const
{
ReadWriteBufferFromHTTP buf(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
ConnectionTimeouts::getHTTPTimeouts(getContext()));
http_timeouts);
bool res;
readBoolText(res, buf);
@ -169,7 +254,7 @@ BlockInputStreamPtr LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWri
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
ConnectionTimeouts::getHTTPTimeouts(getContext()),
http_timeouts,
0,
Poco::Net::HTTPBasicCredentials{},
DBMS_DEFAULT_BUFFER_SIZE,
@ -179,4 +264,13 @@ BlockInputStreamPtr LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWri
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(read_buf_ptr));
}
String LibraryBridgeHelper::getDictIdsString(const std::vector<UInt64> & ids)
{
WriteBufferFromOwnString out;
writeVectorBinary(ids, out);
return out.str();
}
}

View File

@ -15,11 +15,18 @@ class LibraryBridgeHelper : public IBridgeHelper
{
public:
struct LibraryInitData
{
String library_path;
String library_settings;
String dict_attributes;
};
static constexpr inline size_t DEFAULT_PORT = 9012;
LibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_);
LibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
bool initLibrary(const std::string & library_path, std::string library_settings, std::string attributes_names);
bool initLibrary();
bool cloneLibrary(const Field & other_dictionary_id);
@ -31,16 +38,19 @@ public:
BlockInputStreamPtr loadAll();
BlockInputStreamPtr loadIds(std::string ids_string);
BlockInputStreamPtr loadIds(const std::vector<uint64_t> & ids);
BlockInputStreamPtr loadKeys(const Block & requested_block);
BlockInputStreamPtr loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {}) const;
LibraryInitData getLibraryData() const { return library_data; }
protected:
bool bridgeHandShake() override;
void startBridge(std::unique_ptr<ShellCommand> cmd) const override;
String serviceAlias() const override { return "clickhouse-library-bridge"; }
@ -61,6 +71,8 @@ protected:
Poco::URI createBaseURI() const override;
ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const;
private:
static constexpr inline auto LIB_NEW_METHOD = "libNew";
static constexpr inline auto LIB_CLONE_METHOD = "libClone";
@ -69,18 +81,24 @@ private:
static constexpr inline auto LOAD_IDS_METHOD = "loadIds";
static constexpr inline auto LOAD_KEYS_METHOD = "loadKeys";
static constexpr inline auto IS_MODIFIED_METHOD = "isModified";
static constexpr inline auto PING = "ping";
static constexpr inline auto SUPPORTS_SELECTIVE_LOAD_METHOD = "supportsSelectiveLoad";
Poco::URI createRequestURI(const String & method) const;
static String getDictIdsString(const std::vector<UInt64> & ids);
Poco::Logger * log;
const Block sample_block;
const Poco::Util::AbstractConfiguration & config;
const Poco::Timespan http_timeout;
LibraryInitData library_data;
Field dictionary_id;
std::string bridge_host;
size_t bridge_port;
bool library_initialized = false;
ConnectionTimeouts http_timeouts;
};
}

View File

@ -60,20 +60,33 @@ public:
static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
XDBCBridgeHelper(
ContextPtr context_,
Poco::Timespan http_timeout_,
const std::string & connection_string_)
: IXDBCBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
, connection_string(connection_string_)
, http_timeout(http_timeout_)
, config(context_->getGlobalContext()->getConfigRef())
{
bridge_host = config.getString(BridgeHelperMixin::configPrefix() + ".host", DEFAULT_HOST);
bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT);
}
ContextPtr context_,
Poco::Timespan http_timeout_,
const std::string & connection_string_)
: IXDBCBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
, connection_string(connection_string_)
, http_timeout(http_timeout_)
, config(context_->getGlobalContext()->getConfigRef())
{
bridge_host = config.getString(BridgeHelperMixin::configPrefix() + ".host", DEFAULT_HOST);
bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT);
}
protected:
bool bridgeHandShake() override
{
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()));
return checkString(PING_OK_ANSWER, buf);
}
catch (...)
{
return false;
}
}
auto getConnectionString() const { return connection_string; }
String getName() const override { return BridgeHelperMixin::getName(); }

View File

@ -132,6 +132,10 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -502,6 +502,10 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -410,6 +410,10 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -62,6 +62,7 @@ private:
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int ILLEGAL_CODEC_PARAMETER;
}
@ -93,7 +94,10 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si
void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
if (!success)
throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS);
}
void registerCodecLZ4(CompressionCodecFactory & factory)

View File

@ -412,13 +412,16 @@ template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match,
/// See also https://stackoverflow.com/a/30669632
template <size_t copy_amount, bool use_shuffle>
void NO_INLINE decompressImpl(
bool NO_INLINE decompressImpl(
const char * const source,
char * const dest,
size_t source_size,
size_t dest_size)
{
const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
UInt8 * op = reinterpret_cast<UInt8 *>(dest);
const UInt8 * const input_end = ip + source_size;
UInt8 * const output_begin = op;
UInt8 * const output_end = op + dest_size;
/// Unrolling with clang is doing >10% performance degrade.
@ -461,13 +464,19 @@ void NO_INLINE decompressImpl(
/// output: xyzHello, w
/// ^-op (we will overwrite excessive bytes on next iteration)
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (target == output_end)
return true;
}
ip += length;
op = copy_end;
if (copy_end >= output_end)
return;
if (unlikely(ip > input_end))
return false;
/// Get match offset.
@ -475,6 +484,9 @@ void NO_INLINE decompressImpl(
ip += 2;
const UInt8 * match = op - offset;
if (unlikely(match < output_begin))
return false;
/// Get match length.
length = token & 0x0F;
@ -515,7 +527,10 @@ void NO_INLINE decompressImpl(
copy<copy_amount>(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer.
if (length > copy_amount * 2)
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, copy_end);
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, target);
}
op = copy_end;
}
@ -524,7 +539,7 @@ void NO_INLINE decompressImpl(
}
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,
@ -532,7 +547,7 @@ void decompress(
PerformanceStatistics & statistics [[maybe_unused]])
{
if (source_size == 0 || dest_size == 0)
return;
return true;
/// Don't run timer if the block is too small.
if (dest_size >= 32768)
@ -542,24 +557,27 @@ void decompress(
/// Run the selected method and measure time.
Stopwatch watch;
bool success = true;
if (best_variant == 0)
decompressImpl<16, true>(source, dest, dest_size);
success = decompressImpl<16, true>(source, dest, source_size, dest_size);
if (best_variant == 1)
decompressImpl<16, false>(source, dest, dest_size);
success = decompressImpl<16, false>(source, dest, source_size, dest_size);
if (best_variant == 2)
decompressImpl<8, true>(source, dest, dest_size);
success = decompressImpl<8, true>(source, dest, source_size, dest_size);
if (best_variant == 3)
decompressImpl<32, false>(source, dest, dest_size);
success = decompressImpl<32, false>(source, dest, source_size, dest_size);
watch.stop();
/// Update performance statistics.
statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size);
return success;
}
else
{
decompressImpl<8, false>(source, dest, dest_size);
return decompressImpl<8, false>(source, dest, source_size, dest_size);
}
}

View File

@ -122,14 +122,14 @@ struct PerformanceStatistics
return choose_method;
}
PerformanceStatistics() {}
PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
PerformanceStatistics() = default;
explicit PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
};
/** This method dispatch to one of different implementations depending on performance statistics.
*/
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,

View File

@ -449,7 +449,6 @@ class IColumn;
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \

View File

@ -3,6 +3,7 @@
#include <Common/ProfileEvents.h>
#include <Common/CurrentThread.h>
#include <IO/WriteHelpers.h>
#include <Common/Stopwatch.h>
#include <common/sleep.h>
namespace ProfileEvents
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
}
}
bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
{
if (max_execution_time != 0
&& elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
if (max_execution_time != 0)
{
auto elapsed_ns = stopwatch.elapsed();
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
ErrorCodes::TIMEOUT_EXCEEDED);
}
return true;
}

View File

@ -3,6 +3,7 @@
#include <Poco/Timespan.h>
#include <common/types.h>
#include <DataStreams/SizeLimits.h>
#include <Common/Stopwatch.h>
namespace DB
{
@ -25,7 +26,7 @@ public:
/// Pause execution in case if speed limits were exceeded.
void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;
bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
};
}

View File

@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)
bool IBlockInputStream::checkTimeLimit() const
{
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
}

View File

@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
EXPECT_EQ(pipe.numOutputPorts(), 3);
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
pipe.addTransform(std::move(transform));
@ -128,7 +128,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
EXPECT_EQ(pipe.numOutputPorts(), 3);
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
pipe.addTransform(std::move(transform));

View File

@ -103,9 +103,11 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const String & engine_name = engine_define->engine->name;
const UUID & uuid = create.uuid;
bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL" ||
engine_name == "Lazy" || engine_name == "Replicated" || engine_name == "PostgreSQL" ||
engine_name == "MaterializedPostgreSQL" || engine_name == "SQLite";
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
@ -113,6 +115,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by;
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
if (has_unexpected_element || (!may_have_settings && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -233,11 +236,10 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 5)
throw Exception(fmt::format(
"{} Database require host:port, database_name, username, password arguments "
"[, use_table_cache = 0].", engine_name),
ErrorCodes::BAD_ARGUMENTS);
if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 6)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} Database require `host:port`, `database_name`, `username`, `password` [, `schema` = "", `use_table_cache` = 0].",
engine_name);
ASTs & engine_args = engine->arguments->children;
@ -249,9 +251,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);
String schema;
if (engine->arguments->children.size() >= 5)
schema = safeGetLiteralValue<String>(engine_args[4], engine_name);
auto use_table_cache = 0;
if (engine->arguments->children.size() == 5)
use_table_cache = safeGetLiteralValue<UInt64>(engine_args[4], engine_name);
if (engine->arguments->children.size() >= 6)
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[5], engine_name);
/// Split into replicas if needed.
size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
@ -266,7 +272,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
context->getSettingsRef().postgresql_connection_pool_wait_timeout);
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
context, metadata_path, engine_define, database_name, postgres_database_name, schema, connection_pool, use_table_cache);
}
else if (engine_name == "MaterializedPostgreSQL")
{
@ -274,9 +280,9 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
if (!engine->arguments || engine->arguments->children.size() != 4)
{
throw Exception(
fmt::format("{} Database require host:port, database_name, username, password arguments ", engine_name),
ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} Database require `host:port`, `database_name`, `username`, `password`.",
engine_name);
}
ASTs & engine_args = engine->arguments->children;

View File

@ -40,6 +40,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int INCORRECT_QUERY;
extern const int ALL_CONNECTION_TRIES_FAILED;
extern const int NO_ACTIVE_REPLICAS;
}
static constexpr const char * DROPPED_MARK = "DROPPED";
@ -137,7 +138,9 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
Coordination::Stat stat;
hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat);
if (hosts.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts found");
throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No replicas of database {} found. "
"It's possible if the first replica is not fully created yet "
"or if the last replica was just dropped or due to logical error", database_name);
Int32 cversion = stat.cversion;
std::sort(hosts.begin(), hosts.end());
@ -514,6 +517,19 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
}
}
auto make_query_context = [this, current_zookeeper]()
{
auto query_context = Context::createCopy(getContext());
query_context->makeQueryContext();
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->getClientInfo().is_replicated_database_internal = true;
query_context->setCurrentDatabase(database_name);
query_context->setCurrentQueryId("");
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");
query_context->initZooKeeperMetadataTransaction(txn);
return query_context;
};
String db_name = getDatabaseName();
String to_db_name = getDatabaseName() + BROKEN_TABLES_SUFFIX;
if (total_tables * db_settings.max_broken_tables_ratio < tables_to_detach.size())
@ -548,7 +564,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
dropped_dictionaries += table->isDictionary();
table->flushAndShutdown();
DatabaseAtomic::dropTable(getContext(), table_name, true);
DatabaseAtomic::dropTable(make_query_context(), table_name, true);
}
else
{
@ -558,7 +574,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
assert(db_name < to_db_name);
DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(to_db_name, to_name);
auto to_db_ptr = DatabaseCatalog::instance().getDatabase(to_db_name);
DatabaseAtomic::renameTable(getContext(), table_name, *to_db_ptr, to_name, false, false);
DatabaseAtomic::renameTable(make_query_context(), table_name, *to_db_ptr, to_name, false, false);
++moved_tables;
}
}
@ -577,7 +593,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
/// TODO Maybe we should do it in two steps: rename all tables to temporary names and then rename them to actual names?
DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to));
DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to));
DatabaseAtomic::renameTable(getContext(), from, *this, to, false, false);
DatabaseAtomic::renameTable(make_query_context(), from, *this, to, false, false);
}
for (const auto & id : dropped_tables)
@ -592,15 +608,9 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
}
auto query_ast = parseQueryFromMetadataInZooKeeper(name_and_meta.first, name_and_meta.second);
auto query_context = Context::createCopy(getContext());
query_context->makeQueryContext();
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->setCurrentDatabase(database_name);
query_context->setCurrentQueryId(""); // generate random query_id
LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
InterpreterCreateQuery(query_ast, query_context).execute();
auto create_query_context = make_query_context();
InterpreterCreateQuery(query_ast, create_query_context).execute();
}
current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr));

View File

@ -60,12 +60,13 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
/// Check if we need to recover replica.
/// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep.
String log_ptr_str = current_zookeeper->get(database->replica_path + "/log_ptr");
auto zookeeper = getAndSetZooKeeper();
String log_ptr_str = zookeeper->get(database->replica_path + "/log_ptr");
UInt32 our_log_ptr = parse<UInt32>(log_ptr_str);
UInt32 max_log_ptr = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
logs_to_keep = parse<UInt32>(current_zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
logs_to_keep = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
database->recoverLostReplica(current_zookeeper, our_log_ptr, max_log_ptr);
database->recoverLostReplica(zookeeper, our_log_ptr, max_log_ptr);
else
last_skipped_entry_name.emplace(DDLTaskBase::getLogEntryName(our_log_ptr));
}
@ -198,7 +199,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
}
}
UInt32 our_log_ptr = parse<UInt32>(current_zookeeper->get(fs::path(database->replica_path) / "log_ptr"));
UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(fs::path(database->replica_path) / "log_ptr"));
UInt32 entry_num = DatabaseReplicatedTask::getLogEntryNumber(entry_name);
if (entry_num <= our_log_ptr)

View File

@ -43,7 +43,7 @@ private:
mutable std::mutex mutex;
std::condition_variable wait_current_task_change;
String current_task;
UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
std::atomic<UInt32> logs_to_keep = std::numeric_limits<UInt32>::max();
};
}

View File

@ -39,14 +39,16 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const String & metadata_path_,
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
const String & postgres_dbname_,
const String & postgres_schema_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_)
: IDatabase(dbname_)
, WithContext(context_->getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, postgres_dbname(postgres_dbname_)
, postgres_schema(postgres_schema_)
, pool(std::move(pool_))
, cache_tables(cache_tables_)
{
@ -55,12 +57,28 @@ DatabasePostgreSQL::DatabasePostgreSQL(
}
String DatabasePostgreSQL::getTableNameForLogs(const String & table_name) const
{
if (postgres_schema.empty())
return fmt::format("{}.{}", postgres_dbname, table_name);
return fmt::format("{}.{}.{}", postgres_dbname, postgres_schema, table_name);
}
String DatabasePostgreSQL::formatTableName(const String & table_name) const
{
if (postgres_schema.empty())
return doubleQuoteString(table_name);
return fmt::format("{}.{}", doubleQuoteString(postgres_schema), doubleQuoteString(table_name));
}
bool DatabasePostgreSQL::empty() const
{
std::lock_guard<std::mutex> lock(mutex);
auto connection_holder = pool->get();
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get());
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
for (const auto & table_name : tables_list)
if (!detached_or_dropped.count(table_name))
@ -76,7 +94,7 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
Tables tables;
auto connection_holder = pool->get();
auto table_names = fetchPostgreSQLTablesList(connection_holder->get());
auto table_names = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
for (const auto & table_name : table_names)
if (!detached_or_dropped.count(table_name))
@ -104,8 +122,11 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
pqxx::result result = tx.exec(fmt::format(
"SELECT '{}'::regclass, tablename "
"FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' "
"AND tablename = '{}'", table_name, table_name));
"WHERE schemaname != 'pg_catalog' AND {} "
"AND tablename = '{}'",
formatTableName(table_name),
(postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema)),
formatTableName(table_name)));
}
catch (pqxx::undefined_table const &)
{
@ -151,14 +172,14 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr
return StoragePtr{};
auto connection_holder = pool->get();
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), doubleQuoteString(table_name)).columns;
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), formatTableName(table_name)).columns;
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name), pool, table_name,
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context);
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context, postgres_schema);
if (cache_tables)
cached_tables[table_name] = storage;
@ -182,10 +203,14 @@ void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr
std::lock_guard<std::mutex> lock{mutex};
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot attach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE,
"Cannot attach PostgreSQL table {} because it does not exist in PostgreSQL",
getTableNameForLogs(table_name), database_name);
if (!detached_or_dropped.count(table_name))
throw Exception(fmt::format("Cannot attach table {}.{}. It already exists", database_name, table_name), ErrorCodes::TABLE_ALREADY_EXISTS);
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS,
"Cannot attach PostgreSQL table {} because it already exists",
getTableNameForLogs(table_name), database_name);
if (cache_tables)
cached_tables[table_name] = storage;
@ -203,10 +228,10 @@ StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
std::lock_guard<std::mutex> lock{mutex};
if (detached_or_dropped.count(table_name))
throw Exception(fmt::format("Cannot detach table {}.{}. It is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot detach table {}. It is already dropped/detached", getTableNameForLogs(table_name));
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot detach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot detach table {}, because it does not exist", getTableNameForLogs(table_name));
if (cache_tables)
cached_tables.erase(table_name);
@ -234,10 +259,10 @@ void DatabasePostgreSQL::dropTable(ContextPtr, const String & table_name, bool /
std::lock_guard<std::mutex> lock{mutex};
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot drop table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot drop table {} because it does not exist", getTableNameForLogs(table_name));
if (detached_or_dropped.count(table_name))
throw Exception(fmt::format("Table {}.{} is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is already dropped/detached", getTableNameForLogs(table_name));
fs::path mark_table_removed = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix);
FS::createFile(mark_table_removed);
@ -281,7 +306,7 @@ void DatabasePostgreSQL::removeOutdatedTables()
{
std::lock_guard<std::mutex> lock{mutex};
auto connection_holder = pool->get();
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get());
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
if (cache_tables)
{
@ -334,7 +359,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
if (!storage)
{
if (throw_on_error)
throw Exception(fmt::format("PostgreSQL table {}.{} does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", getTableNameForLogs(table_name));
return nullptr;
}
@ -367,9 +392,9 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;
/// Remove extra engine argument (`use_table_cache`)
if (storage_engine_arguments->children.size() > 4)
storage_engine_arguments->children.resize(storage_engine_arguments->children.size() - 1);
/// Remove extra engine argument (`schema` and `use_table_cache`)
if (storage_engine_arguments->children.size() >= 5)
storage_engine_arguments->children.resize(4);
/// Add table_name to engine arguments
assert(storage_engine_arguments->children.size() >= 2);

View File

@ -32,7 +32,8 @@ public:
const String & metadata_path_,
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
const String & postgres_dbname_,
const String & postgres_schema_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_);
@ -69,7 +70,8 @@ protected:
private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
String postgres_dbname;
String postgres_schema;
postgres::PoolWithFailoverPtr pool;
const bool cache_tables;
@ -77,6 +79,10 @@ private:
std::unordered_set<std::string> detached_or_dropped;
BackgroundSchedulePool::TaskHolder cleaner_task;
String getTableNameForLogs(const String & table_name) const;
String formatTableName(const String & table_name) const;
bool checkPostgresTable(const String & table_name) const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const;

View File

@ -27,11 +27,12 @@ namespace ErrorCodes
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx)
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema)
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND {}",
postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema));
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(std::get<0>(table_name));
@ -270,10 +271,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connec
}
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection)
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTablesList(tx);
auto result = fetchPostgreSQLTablesList(tx, postgres_schema);
tx.commit();
return result;
}
@ -290,10 +291,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
bool with_primary_key, bool with_replica_identity_index);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema);
}

View File

@ -21,7 +21,7 @@ struct PostgreSQLTableStructure
using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema);
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true);
@ -32,7 +32,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
bool with_primary_key = false, bool with_replica_identity_index = false);
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema);
}

View File

@ -41,6 +41,9 @@ LibraryDictionarySource::LibraryDictionarySource(
, sample_block{sample_block_}
, context(Context::createCopy(context_))
{
if (fs::path(path).is_relative())
path = fs::canonical(path);
if (created_from_ddl && !pathStartsWith(path, context->getDictionariesLibPath()))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, context->getDictionariesLibPath());
@ -48,17 +51,32 @@ LibraryDictionarySource::LibraryDictionarySource(
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "LibraryDictionarySource: Can't load library {}: file doesn't exist", path);
description.init(sample_block);
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id);
auto res = bridge_helper->initLibrary(path, getLibrarySettingsString(config, config_prefix + ".settings"), getDictAttributesString());
if (!res)
LibraryBridgeHelper::LibraryInitData library_data
{
.library_path = path,
.library_settings = getLibrarySettingsString(config, config_prefix + ".settings"),
.dict_attributes = getDictAttributesString()
};
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id, library_data);
if (!bridge_helper->initLibrary())
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Failed to create shared library from path: {}", path);
}
LibraryDictionarySource::~LibraryDictionarySource()
{
bridge_helper->removeLibrary();
try
{
bridge_helper->removeLibrary();
}
catch (...)
{
tryLogCurrentException("LibraryDictionarySource");
}
}
@ -72,8 +90,9 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource &
, context(other.context)
, description{other.description}
{
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id);
bridge_helper->cloneLibrary(other.dictionary_id);
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id, other.bridge_helper->getLibraryData());
if (!bridge_helper->cloneLibrary(other.dictionary_id))
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Failed to clone library");
}
@ -99,7 +118,7 @@ BlockInputStreamPtr LibraryDictionarySource::loadAll()
BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
return bridge_helper->loadIds(getDictIdsString(ids));
return bridge_helper->loadIds(ids);
}
@ -147,14 +166,6 @@ String LibraryDictionarySource::getLibrarySettingsString(const Poco::Util::Abstr
}
String LibraryDictionarySource::getDictIdsString(const std::vector<UInt64> & ids)
{
WriteBufferFromOwnString out;
writeVectorBinary(ids, out);
return out.str();
}
String LibraryDictionarySource::getDictAttributesString()
{
std::vector<String> attributes_names(dict_struct.attributes.size());

View File

@ -70,8 +70,6 @@ public:
std::string toString() const override;
private:
static String getDictIdsString(const std::vector<UInt64> & ids);
String getDictAttributesString();
static String getLibrarySettingsString(const Poco::Util::AbstractConfiguration & config, const std::string & config_root);
@ -82,7 +80,7 @@ private:
const DictionaryStructure dict_struct;
const std::string config_prefix;
const std::string path;
std::string path;
const Field dictionary_id;
Block sample_block;

View File

@ -31,6 +31,56 @@ std::mutex DiskLocal::reservation_mutex;
using DiskLocalPtr = std::shared_ptr<DiskLocal>;
static void loadDiskLocalConfig(const String & name,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
String & path,
UInt64 & keep_free_space_bytes)
{
path = config.getString(config_prefix + ".path", "");
if (name == "default")
{
if (!path.empty())
throw Exception(
"\"default\" disk path should be provided in <path> not it <storage_configuration>",
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
path = context->getPath();
}
else
{
if (path.empty())
throw Exception("Disk path can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
if (path.back() != '/')
throw Exception("Disk path must end with /. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
if (!FS::canRead(path) || !FS::canWrite(path))
throw Exception("There is no RW access to the disk " + name + " (" + path + ")", ErrorCodes::PATH_ACCESS_DENIED);
bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio");
if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio)
throw Exception(
"Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
keep_free_space_bytes = config.getUInt64(config_prefix + ".keep_free_space_bytes", 0);
if (has_space_ratio)
{
auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
if (ratio < 0 || ratio > 1)
throw Exception("'keep_free_space_ratio' have to be between 0 and 1", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
String tmp_path = path;
if (tmp_path.empty())
tmp_path = context->getPath();
// Create tmp disk for getting total disk space.
keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
}
}
class DiskLocalReservation : public IReservation
{
public:
@ -317,6 +367,21 @@ SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
return std::make_unique<LocalDirectorySyncGuard>(fs::path(disk_path) / path);
}
void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &)
{
String new_disk_path;
UInt64 new_keep_free_space_bytes;
loadDiskLocalConfig(name, config, config_prefix, context, new_disk_path, new_keep_free_space_bytes);
if (disk_path != new_disk_path)
throw Exception("Disk path can't be updated from config " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
if (keep_free_space_bytes != new_keep_free_space_bytes)
keep_free_space_bytes = new_keep_free_space_bytes;
}
DiskPtr DiskLocalReservation::getDisk(size_t i) const
{
if (i != 0)
@ -334,7 +399,6 @@ void DiskLocalReservation::update(UInt64 new_size)
disk->reserved_bytes += size;
}
DiskLocalReservation::~DiskLocalReservation()
{
try
@ -369,48 +433,9 @@ void registerDiskLocal(DiskFactory & factory)
const String & config_prefix,
ContextPtr context,
const DisksMap & /*map*/) -> DiskPtr {
String path = config.getString(config_prefix + ".path", "");
if (name == "default")
{
if (!path.empty())
throw Exception(
"\"default\" disk path should be provided in <path> not it <storage_configuration>",
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
path = context->getPath();
}
else
{
if (path.empty())
throw Exception("Disk path can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
if (path.back() != '/')
throw Exception("Disk path must end with /. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
if (!FS::canRead(path) || !FS::canWrite(path))
throw Exception("There is no RW access to the disk " + name + " (" + path + ")", ErrorCodes::PATH_ACCESS_DENIED);
bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio");
if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio)
throw Exception(
"Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
UInt64 keep_free_space_bytes = config.getUInt64(config_prefix + ".keep_free_space_bytes", 0);
if (has_space_ratio)
{
auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
if (ratio < 0 || ratio > 1)
throw Exception("'keep_free_space_ratio' have to be between 0 and 1", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
String tmp_path = path;
if (tmp_path.empty())
tmp_path = context->getPath();
// Create tmp disk for getting total disk space.
keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
}
String path;
UInt64 keep_free_space_bytes;
loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes);
return std::make_shared<DiskLocal>(name, path, keep_free_space_bytes);
};
factory.registerDiskType("local", creator);

View File

@ -5,6 +5,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/WriteBufferFromFile.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
@ -104,13 +105,15 @@ public:
SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) override;
private:
bool tryReserve(UInt64 bytes);
private:
const String name;
const String disk_path;
const UInt64 keep_free_space_bytes;
std::atomic<UInt64> keep_free_space_bytes;
UInt64 reserved_bytes = 0;
UInt64 reservation_count = 0;
@ -120,4 +123,5 @@ private:
Poco::Logger * log = &Poco::Logger::get("DiskLocal");
};
}

View File

@ -32,7 +32,7 @@ public:
/// Get all disks with names
const DisksMap & getDisksMap() const { return disks; }
void addToDiskMap(String name, DiskPtr disk)
void addToDiskMap(const String & name, DiskPtr disk)
{
disks.emplace(name, disk);
}

View File

@ -13,9 +13,9 @@
#include <mutex>
#include <utility>
#include <boost/noncopyable.hpp>
#include "Poco/Util/AbstractConfiguration.h"
#include <Poco/Timestamp.h>
#include <filesystem>
#include "Poco/Util/AbstractConfiguration.h"
namespace fs = std::filesystem;

View File

@ -363,7 +363,8 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
settings->client,
source_bucket,
source_path + SCHEMA_VERSION_OBJECT,
settings->s3_max_single_read_retries);
settings->s3_max_single_read_retries,
DBMS_DEFAULT_BUFFER_SIZE);
readIntText(version, buffer);

View File

@ -19,6 +19,7 @@ public:
virtual ~ProxyConfiguration() = default;
/// Returns proxy configuration on each HTTP request.
virtual Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
virtual void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) = 0;
};
}

View File

@ -20,6 +20,7 @@ class ProxyListConfiguration : public ProxyConfiguration
public:
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const Aws::Client::ClientConfigurationPerRequest &) override {}
private:
/// List of configured proxies.

View File

@ -16,8 +16,10 @@ namespace DB::ErrorCodes
namespace DB::S3
{
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_)
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
, unsigned proxy_port_, unsigned cache_ttl_)
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
{
}
@ -25,16 +27,25 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
std::unique_lock lock(cache_mutex);
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxyScheme), cached_config.proxyHost, cached_config.proxyPort);
return cached_config;
}
/// 1 second is enough for now.
/// TODO: Make timeouts configurable.
ConnectionTimeouts timeouts(
Poco::Timespan(1000000), /// Connection timeout.
Poco::Timespan(1000000), /// Send timeout.
Poco::Timespan(1000000) /// Receive timeout.
Poco::Timespan(1000000) /// Receive timeout.
);
auto session = makeHTTPSession(endpoint, timeouts);
Aws::Client::ClientConfigurationPerRequest cfg;
try
{
/// It should be just empty GET request.
@ -53,20 +64,41 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
cfg.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
cfg.proxyHost = proxy_host;
cfg.proxyPort = proxy_port;
cached_config.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
cached_config.proxyHost = proxy_host;
cached_config.proxyPort = proxy_port;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;
return cfg;
return cached_config;
}
catch (...)
{
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
/// Don't use proxy if it can't be obtained.
Aws::Client::ClientConfigurationPerRequest cfg;
return cfg;
}
}
void ProxyResolverConfiguration::errorReport(const Aws::Client::ClientConfigurationPerRequest & config)
{
if (config.proxyHost.empty())
return;
std::unique_lock lock(cache_mutex);
if (!cache_ttl.count() || !cache_valid)
return;
if (cached_config.proxyScheme != config.proxyScheme || cached_config.proxyHost != config.proxyHost
|| cached_config.proxyPort != config.proxyPort)
return;
/// Invalidate cached proxy when got error with this proxy
cache_valid = false;
}
}
#endif

View File

@ -8,6 +8,8 @@
#include "ProxyConfiguration.h"
#include <mutex>
namespace DB::S3
{
/**
@ -18,8 +20,9 @@ namespace DB::S3
class ProxyResolverConfiguration : public ProxyConfiguration
{
public:
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_);
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) override;
private:
/// Endpoint to obtain a proxy host.
@ -28,6 +31,12 @@ private:
const String proxy_scheme;
/// Port for obtained proxy.
const unsigned proxy_port;
std::mutex cache_mutex;
bool cache_valid = false;
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
const std::chrono::seconds cache_ttl{0};
Aws::Client::ClientConfigurationPerRequest cached_config;
};
}

View File

@ -56,11 +56,12 @@ std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
}
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
@ -128,8 +129,12 @@ getClient(const Poco::Util::AbstractConfiguration & config, const String & confi
auto proxy_config = getProxyConfiguration(config_prefix, config);
if (proxy_config)
{
client_configuration.perRequestConfiguration
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
}
client_configuration.retryStrategy
= std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));

View File

@ -1218,17 +1218,36 @@ public:
{
return res;
}
else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
// Comparing Date and DateTime64 requires implicit conversion,
// otherwise Date is treated as number.
&& !(date_and_datetime && (isDate(left_type) || isDate(right_type))))
else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type)))
{
// compare
if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
throw Exception("No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
// Comparing Date and DateTime64 requires implicit conversion,
if (date_and_datetime && (isDate(left_type) || isDate(right_type)))
{
DataTypePtr common_type = getLeastSupertype({left_type, right_type});
ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type);
ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type);
return executeDecimal({c0_converted, common_type, "left"}, {c1_converted, common_type, "right"});
}
else
{
// compare
if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
throw Exception(
"No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right);
}
return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right);
}
else if (date_and_datetime)
{
DataTypePtr common_type = getLeastSupertype({left_type, right_type});
ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type);
ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type);
if (!((res = executeNumLeftType<UInt32>(c0_converted.get(), c1_converted.get()))
|| (res = executeNumLeftType<UInt64>(c0_converted.get(), c1_converted.get()))))
throw Exception("Date related common types can only be UInt32 or UInt64", ErrorCodes::LOGICAL_ERROR);
return res;
}
else if (left_type->equals(*right_type))
{

View File

@ -42,6 +42,8 @@ struct MultiSearchFirstIndexImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -51,6 +51,8 @@ struct MultiSearchFirstPositionImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -41,6 +41,8 @@ struct MultiSearchImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -113,12 +113,34 @@ namespace MultiRegexps
ScratchPtr scratch;
};
class RegexpsConstructor
{
public:
RegexpsConstructor() = default;
void setConstructor(std::function<Regexps()> constructor_) { constructor = std::move(constructor_); }
Regexps * operator()()
{
std::unique_lock lock(mutex);
if (regexp)
return &*regexp;
regexp = constructor();
return &*regexp;
}
private:
std::function<Regexps()> constructor;
std::optional<Regexps> regexp;
std::mutex mutex;
};
struct Pool
{
/// Mutex for finding in map.
std::mutex mutex;
/// Patterns + possible edit_distance to database and scratch.
std::map<std::pair<std::vector<String>, std::optional<UInt32>>, Regexps> storage;
std::map<std::pair<std::vector<String>, std::optional<UInt32>>, RegexpsConstructor> storage;
};
template <bool save_indices, bool CompileForEditDistance>
@ -250,15 +272,19 @@ namespace MultiRegexps
/// If not found, compile and let other threads wait.
if (known_regexps.storage.end() == it)
{
it = known_regexps.storage
.emplace(
std::pair{str_patterns, edit_distance},
constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance))
.emplace(std::piecewise_construct, std::make_tuple(std::move(str_patterns), edit_distance), std::make_tuple())
.first;
/// If found, unlock and return the database.
lock.unlock();
it->second.setConstructor([&str_patterns = it->first.first, edit_distance]()
{
return constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance);
});
}
return &it->second;
/// Unlock before possible construction.
lock.unlock();
return it->second();
}
}

View File

@ -1,4 +1,5 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeArray.h>
@ -7,6 +8,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include "Core/ColumnWithTypeAndName.h"
#include "DataTypes/DataTypeMap.h"
#include "DataTypes/IDataType.h"
namespace DB
@ -32,85 +34,211 @@ private:
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
{
WhichDataType which_key(key_type);
if (!(which_key.isInt() || which_key.isUInt()))
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
}
if (max_key_type)
{
WhichDataType which_max_key(max_key_type);
if (which_max_key.isNullable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Max key argument in arguments of function " + getName() + " can not be Nullable");
if (key_type->getTypeId() != max_key_type->getTypeId())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
}
}
DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
{
if (arguments.size() < 2)
throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());
if (arguments.size() > 3)
throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!key_array_type || !val_array_type)
throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());
DataTypePtr keys_type = key_array_type->getNestedType();
WhichDataType which_key(keys_type);
if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
{
throw Exception(
"Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const auto & key_type = key_array_type->getNestedType();
if (arguments.size() == 3)
{
DataTypePtr max_key_type = arguments[2];
WhichDataType which_max_key(max_key_type);
if (which_max_key.isNullable())
throw Exception(
"Max key argument in arguments of function " + getName() + " can not be Nullable",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (keys_type->getTypeId() != max_key_type->getTypeId())
throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
this->checkTypes(key_type, arguments[2]);
else
this->checkTypes(key_type, nullptr);
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
}
template <typename KeyType, typename ValType>
ColumnPtr execute2(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
{
MutableColumnPtr res_tuple = res_type.createColumn();
const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
if (arguments.size() == 1)
this->checkTypes(map->getKeyType(), nullptr);
else if (arguments.size() == 2)
this->checkTypes(map->getKeyType(), arguments[1]);
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
auto & to_keys_data = to_keys_arr.getData();
auto & to_keys_offsets = to_keys_arr.getOffsets();
return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
}
auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
auto & to_values_data = to_vals_arr.getData();
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");
bool max_key_is_const = false, key_is_const = false, val_is_const = false;
if (arguments[0]->getTypeId() == TypeIndex::Array)
return getReturnTypeForTuple(arguments);
else if (arguments[0]->getTypeId() == TypeIndex::Map)
return getReturnTypeForMap(arguments);
else
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} only accepts one map or arrays, but got {}",
getName(),
arguments[0]->getName());
}
const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
if (!keys_array)
// Struct holds input and output columns references,
// Both arrays and maps have similar columns to work with but extracted differently
template <typename KeyType, typename ValType>
struct ColumnsInOut
{
// inputs
const PaddedPODArray<KeyType> & in_keys_data;
const PaddedPODArray<ValType> & in_vals_data;
const IColumn::Offsets & in_key_offsets;
const IColumn::Offsets & in_val_offsets;
size_t row_count;
bool key_is_const;
bool val_is_const;
// outputs
PaddedPODArray<KeyType> & out_keys_data;
PaddedPODArray<ValType> & out_vals_data;
IColumn::Offsets & out_keys_offsets;
// with map argument this field will not be used
IColumn::Offsets * out_vals_offsets;
};
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
{
auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
const auto * key_column = arg_columns[0].get();
const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
bool key_is_const = false, val_is_const = false;
if (!in_keys_array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
if (!const_array)
throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());
keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
key_is_const = true;
}
const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
if (!values_array)
const auto * val_column = arg_columns[1].get();
const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
if (!in_values_array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
if (!const_array)
throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());
values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
val_is_const = true;
}
if (!keys_array || !values_array)
if (!in_keys_array || !in_values_array)
/* something went wrong */
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
const auto & in_keys_offsets = in_keys_array->getOffsets();
const auto & in_vals_offsets = in_values_array->getOffsets();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
auto & out_keys_offsets = out_keys_array.getOffsets();
size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
return {
in_keys_data,
in_values_data,
in_keys_offsets,
in_vals_offsets,
row_count,
key_is_const,
val_is_const,
out_keys_data,
out_vals_data,
out_keys_offsets,
out_vals_offsets};
}
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
{
const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
const auto & in_nested_array = in_map->getNestedColumn();
const auto & in_nested_tuple = in_map->getNestedData();
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
const auto & in_keys_offsets = in_nested_array.getOffsets();
auto * out_map = assert_cast<ColumnMap *>(res_column.get());
auto & out_nested_array = out_map->getNestedColumn();
auto & out_nested_tuple = out_map->getNestedData();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
auto & out_keys_offsets = out_nested_array.getOffsets();
return {
in_keys_data,
in_vals_data,
in_keys_offsets,
in_keys_offsets,
in_nested_array.size(),
false,
false,
out_keys_data,
out_vals_data,
out_keys_offsets,
nullptr};
}
template <typename KeyType, typename ValType>
ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
{
MutableColumnPtr res_column = res_type->createColumn();
bool max_key_is_const = false;
auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
: getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);
KeyType max_key_const{0};
@ -121,49 +249,43 @@ private:
max_key_is_const = true;
}
auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
// Original offsets
const IColumn::Offsets & key_offsets = keys_array->getOffsets();
const IColumn::Offsets & val_offsets = values_array->getOffsets();
IColumn::Offset offset{0};
size_t row_count = key_is_const ? values_array->size() : keys_array->size();
std::map<KeyType, ValType> res_map;
//Iterate through two arrays and fill result values.
for (size_t row = 0; row < row_count; ++row)
for (size_t row = 0; row < columns.row_count; ++row)
{
size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];
res_map.clear();
if (!key_is_const)
if (!columns.key_is_const)
{
key_offset = row > 0 ? key_offsets[row - 1] : 0;
array_size = key_offsets[row] - key_offset;
key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
items_count = columns.in_key_offsets[row] - key_offset;
}
if (!val_is_const)
if (!columns.val_is_const)
{
val_offset = row > 0 ? val_offsets[row - 1] : 0;
val_array_size = val_offsets[row] - val_offset;
val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
val_array_size = columns.in_val_offsets[row] - val_offset;
}
if (array_size != val_array_size)
throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (items_count != val_array_size)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Key and value array should have same amount of elements in function {}",
getName());
if (array_size == 0)
if (items_count == 0)
{
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
continue;
}
for (size_t i = 0; i < array_size; ++i)
for (size_t i = 0; i < items_count; ++i)
{
res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
}
auto min_key = res_map.begin()->first;
@ -184,7 +306,7 @@ private:
/* no need to add anything, max key is less that first key */
if (max_key < min_key)
{
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
continue;
}
}
@ -197,16 +319,16 @@ private:
KeyType key;
for (key = min_key;; ++key)
{
to_keys_data.insert(key);
columns.out_keys_data.push_back(key);
auto it = res_map.find(key);
if (it != res_map.end())
{
to_values_data.insert(it->second);
columns.out_vals_data.push_back(it->second);
}
else
{
to_values_data.insertDefault();
columns.out_vals_data.push_back(0);
}
++offset;
@ -214,80 +336,112 @@ private:
break;
}
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
}
to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
return res_tuple;
if (columns.out_vals_offsets)
columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
return res_column;
}
template <typename KeyType>
ColumnPtr execute1(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
{
const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
switch (val_type->getTypeId())
{
case TypeIndex::Int8:
return execute2<KeyType, Int8>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
case TypeIndex::Int16:
return execute2<KeyType, Int16>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
case TypeIndex::Int32:
return execute2<KeyType, Int32>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
case TypeIndex::Int64:
return execute2<KeyType, Int64>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
case TypeIndex::Int128:
return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
case TypeIndex::Int256:
return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt8:
return execute2<KeyType, UInt8>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt16:
return execute2<KeyType, UInt16>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt32:
return execute2<KeyType, UInt32>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt64:
return execute2<KeyType, UInt64>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt128:
return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt256:
return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
default:
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
}
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
auto col1 = arguments[0];
auto col2 = arguments[1];
const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
/* determine output type */
const DataTypeTuple & res_type = DataTypeTuple(
DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
DataTypePtr res_type, key_type, val_type;
ColumnPtr max_key_column = nullptr;
ColumnPtr arg_columns[] = {arguments[0].column, nullptr};
if (arguments.size() == 3)
if (arguments[0].type->getTypeId() == TypeIndex::Array)
{
/* max key provided */
max_key_column = arguments[2].column;
key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
arg_columns[1] = arguments[1].column;
if (arguments.size() == 3)
{
/* max key provided */
max_key_column = arguments[2].column;
}
}
else
{
assert(arguments[0].type->getTypeId() == TypeIndex::Map);
const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
key_type = map_type->getKeyType();
val_type = map_type->getValueType();
if (arguments.size() == 2)
{
/* max key provided */
max_key_column = arguments[1].column;
}
}
switch (k->getNestedType()->getTypeId())
switch (key_type->getTypeId())
{
case TypeIndex::Int8:
return execute1<Int8>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int16:
return execute1<Int16>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int32:
return execute1<Int32>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int64:
return execute1<Int64>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int128:
return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int256:
return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt8:
return execute1<UInt8>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt16:
return execute1<UInt16>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt32:
return execute1<UInt32>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt64:
return execute1<UInt64>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt128:
return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt256:
return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
default:
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
}
}
};
@ -296,5 +450,4 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
{
factory.registerFunction<FunctionMapPopulateSeries>();
}
}

View File

@ -43,7 +43,7 @@ public:
const String & bucket_,
const String & key_,
UInt64 max_single_read_retries_,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
size_t buffer_size_);
bool nextImpl() override;

View File

@ -403,7 +403,6 @@ bool tryReadIntText(T & x, ReadBuffer & buf) // -V1071
* Differs in following:
* - for numbers starting with zero, parsed only zero;
* - symbol '+' before number is not supported;
* - symbols :;<=>? are parsed as some numbers.
*/
template <typename T, bool throw_on_error = true>
void readIntTextUnsafe(T & x, ReadBuffer & buf)
@ -437,15 +436,12 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
while (!buf.eof())
{
/// This check is suddenly faster than
/// unsigned char c = *buf.position() - '0';
/// if (c < 10)
/// for unknown reason on Xeon E5645.
unsigned char value = *buf.position() - '0';
if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop.
if (value < 10)
{
res *= 10;
res += *buf.position() & 0x0F;
res += value;
++buf.position();
}
else

View File

@ -89,6 +89,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration)
: per_request_configuration(clientConfiguration.perRequestConfiguration)
, error_report(clientConfiguration.error_report)
, timeouts(ConnectionTimeouts(
Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout.
Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout.
@ -296,6 +297,8 @@ void PocoHTTPClient::makeRequestInternal(
else if (status_code >= 300)
{
ProfileEvents::increment(select_metric(S3MetricType::Errors));
if (status_code >= 500 && error_report)
error_report(request_configuration);
}
response->SetResponseBody(response_body_stream, session);

View File

@ -37,6 +37,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
void updateSchemeAndRegion();
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
private:
PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_);
@ -95,6 +97,7 @@ private:
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
std::function<Aws::Client::ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
ConnectionTimeouts timeouts;
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;

View File

@ -5,7 +5,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd/include
contrib/restricted/fast_float
contrib/restricted/fast_float/include
)
PEERDIR(

View File

@ -4,7 +4,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd/include
contrib/restricted/fast_float
contrib/restricted/fast_float/include
)
PEERDIR(

View File

@ -77,6 +77,7 @@ AsynchronousMetrics::AsynchronousMetrics(
, update_period(update_period_seconds)
, servers_to_start_before_tables(servers_to_start_before_tables_)
, servers(servers_)
, log(&Poco::Logger::get("AsynchronousMetrics"))
{
#if defined(OS_LINUX)
openFileIfExists("/proc/meminfo", meminfo);
@ -174,26 +175,39 @@ AsynchronousMetrics::AsynchronousMetrics(
edac.back().second = openFileIfExists(edac_uncorrectable_file);
}
if (std::filesystem::exists("/sys/block"))
{
for (const auto & device_dir : std::filesystem::directory_iterator("/sys/block"))
{
String device_name = device_dir.path().filename();
/// We are not interested in loopback devices.
if (device_name.starts_with("loop"))
continue;
std::unique_ptr<ReadBufferFromFilePRead> file = openFileIfExists(device_dir.path() / "stat");
if (!file)
continue;
block_devs[device_name] = std::move(file);
}
}
openBlockDevices();
#endif
}
#if defined(OS_LINUX)
void AsynchronousMetrics::openBlockDevices()
{
LOG_TRACE(log, "Scanning /sys/block");
if (!std::filesystem::exists("/sys/block"))
return;
block_devices_rescan_delay.restart();
block_devs.clear();
for (const auto & device_dir : std::filesystem::directory_iterator("/sys/block"))
{
String device_name = device_dir.path().filename();
/// We are not interested in loopback devices.
if (device_name.starts_with("loop"))
continue;
std::unique_ptr<ReadBufferFromFilePRead> file = openFileIfExists(device_dir.path() / "stat");
if (!file)
continue;
block_devs[device_name] = std::move(file);
}
}
#endif
void AsynchronousMetrics::start()
{
/// Update once right now, to make metrics available just after server start
@ -550,7 +564,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
if (difference >= 1048576 || difference <= -1048576)
LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"),
LOG_TRACE(log,
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
ReadableSize(amount),
ReadableSize(peak),
@ -765,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
uint64_t kb = 0;
readText(kb, *meminfo);
if (kb)
if (!kb)
{
skipWhitespaceIfAny(*meminfo, true);
assertString("kB", *meminfo);
skipToNextLineOrEOF(*meminfo);
continue;
}
uint64_t bytes = kb * 1024;
skipWhitespaceIfAny(*meminfo, true);
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
/**
* Not all entries in /proc/meminfo contain the kB suffix, e.g.
* HugePages_Total: 0
* HugePages_Free: 0
* We simply skip such entries as they're not needed
*/
if (*meminfo->position() == '\n')
{
skipToNextLineOrEOF(*meminfo);
continue;
}
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
assertString("kB", *meminfo);
uint64_t bytes = kb * 1024;
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
skipToNextLineOrEOF(*meminfo);
@ -877,9 +908,14 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
}
}
for (auto & [name, device] : block_devs)
/// Update list of block devices periodically
/// (i.e. someone may add new disk to RAID array)
if (block_devices_rescan_delay.elapsedSeconds() >= 300)
openBlockDevices();
try
{
try
for (auto & [name, device] : block_devs)
{
device->rewind();
@ -928,10 +964,20 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
}
}
}
catch (...)
{
/// Try to reopen block devices in case of error
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
try
{
openBlockDevices();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
tryLogCurrentException(__PRETTY_FUNCTION__);
}
if (net_dev)
@ -1303,9 +1349,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
new_values["AsynchronousMetricsCalculationTimeSpent"] = watch.elapsedSeconds();
/// Log the new metrics.
if (auto log = getContext()->getAsynchronousMetricLog())
if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog())
{
log->addValues(new_values);
asynchronous_metric_log->addValues(new_values);
}
first_run = false;

View File

@ -3,6 +3,7 @@
#include <Interpreters/Context_fwd.h>
#include <Common/MemoryStatisticsOS.h>
#include <Common/ThreadPool.h>
#include <Common/Stopwatch.h>
#include <IO/ReadBufferFromFile.h>
#include <condition_variable>
@ -15,6 +16,11 @@
#include <unordered_map>
namespace Poco
{
class Logger;
}
namespace DB
{
@ -175,12 +181,17 @@ private:
std::unordered_map<String /* device name */, NetworkInterfaceStatValues> network_interface_stats;
Stopwatch block_devices_rescan_delay;
void openBlockDevices();
#endif
std::unique_ptr<ThreadFromGlobalPool> thread;
void run();
void update(std::chrono::system_clock::time_point update_time);
Poco::Logger * log;
};
}

View File

@ -100,6 +100,8 @@ public:
UInt64 distributed_depth = 0;
bool is_replicated_database_internal = false;
bool empty() const { return query_kind == QueryKind::NO_QUERY; }
/** Serialization and deserialization.

View File

@ -2796,6 +2796,13 @@ ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
return metadata_transaction;
}
void Context::resetZooKeeperMetadataTransaction()
{
assert(metadata_transaction);
assert(hasQueryContext());
metadata_transaction = nullptr;
}
PartUUIDsPtr Context::getPartUUIDs() const
{
auto lock = getLock();

View File

@ -819,6 +819,8 @@ public:
void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
/// Returns context of current distributed DDL query or nullptr.
ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
/// Removes context of current distributed DDL.
void resetZooKeeperMetadataTransaction();
PartUUIDsPtr getPartUUIDs() const;
PartUUIDsPtr getIgnoredPartUUIDs() const;

View File

@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int UNKNOWN_FORMAT_VERSION;
extern const int UNKNOWN_TYPE_OF_QUERY;
extern const int INCONSISTENT_CLUSTER_DEFINITION;
extern const int LOGICAL_ERROR;
}
HostID HostID::fromString(const String & host_port_str)
@ -359,9 +360,10 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
{
auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper);
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->getClientInfo().is_replicated_database_internal = true;
query_context->setCurrentDatabase(database->getDatabaseName());
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query, entry_path);
query_context->initZooKeeperMetadataTransaction(txn);
if (is_initial_query)
@ -401,7 +403,8 @@ UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
void ZooKeeperMetadataTransaction::commit()
{
assert(state == CREATED);
if (state != CREATED)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state ({}), it's a bug", state);
state = FAILED;
current_zookeeper->multi(ops);
state = COMMITTED;

View File

@ -20,6 +20,11 @@ namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class ASTQueryWithOnCluster;
using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
using ClusterPtr = std::shared_ptr<Cluster>;
@ -164,13 +169,15 @@ class ZooKeeperMetadataTransaction
ZooKeeperPtr current_zookeeper;
String zookeeper_path;
bool is_initial_query;
String task_path;
Coordination::Requests ops;
public:
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_, const String & task_path_)
: current_zookeeper(current_zookeeper_)
, zookeeper_path(zookeeper_path_)
, is_initial_query(is_initial_query_)
, task_path(task_path_)
{
}
@ -180,15 +187,21 @@ public:
String getDatabaseZooKeeperPath() const { return zookeeper_path; }
String getTaskZooKeeperPath() const { return task_path; }
ZooKeeperPtr getZooKeeper() const { return current_zookeeper; }
void addOp(Coordination::RequestPtr && op)
{
assert(!isExecuted());
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
ops.emplace_back(op);
}
void moveOpsTo(Coordination::Requests & other_ops)
{
assert(!isExecuted());
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
ops.clear();
state = COMMITTED;
@ -196,7 +209,7 @@ public:
void commit();
~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exceptions()); }
~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exceptions() || ops.empty()); }
};
ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name);

View File

@ -613,18 +613,6 @@ void makeWindowDescriptionFromAST(const Context & context,
void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
{
// Convenient to check here because at least we have the Context.
if (!syntax->window_function_asts.empty() &&
!getContext()->getSettingsRef().allow_experimental_window_functions)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"The support for window functions is experimental and will change"
" in backwards-incompatible ways in the future releases. Set"
" allow_experimental_window_functions = 1 to enable it."
" While processing '{}'",
syntax->window_function_asts[0]->formatForErrorMessage());
}
// Window definitions from the WINDOW clause
const auto * select_query = query->as<ASTSelectQuery>();
if (select_query && select_query->window())

View File

@ -63,7 +63,7 @@ public:
return;
bool is_table = false;
ASTPtr subquery_or_table_name = ast; /// ASTTableIdentifier | ASTSubquery | ASTTableExpression
ASTPtr subquery_or_table_name; /// ASTTableIdentifier | ASTSubquery | ASTTableExpression
if (const auto * ast_table_expr = ast->as<ASTTableExpression>())
{
@ -76,7 +76,14 @@ public:
}
}
else if (ast->as<ASTTableIdentifier>())
{
subquery_or_table_name = ast;
is_table = true;
}
else if (ast->as<ASTSubquery>())
{
subquery_or_table_name = ast;
}
if (!subquery_or_table_name)
throw Exception("Global subquery requires subquery or table name", ErrorCodes::WRONG_GLOBAL_SUBQUERY);

View File

@ -37,7 +37,7 @@ public:
virtual size_t getTotalRowCount() const = 0;
virtual size_t getTotalByteCount() const = 0;
virtual bool alwaysReturnsEmptySet() const { return false; }
virtual bool alwaysReturnsEmptySet() const = 0;
/// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock.
/// Different query plan is used for such joins.

View File

@ -54,7 +54,7 @@ BlockIO InterpreterAlterQuery::execute()
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
if (typeid_cast<DatabaseReplicated *>(database.get())
&& getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
&& !getContext()->getClientInfo().is_replicated_database_internal)
{
auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
guard->releaseTableLock();
@ -100,7 +100,8 @@ BlockIO InterpreterAlterQuery::execute()
if (typeid_cast<DatabaseReplicated *>(database.get()))
{
int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !live_view_commands.empty() + !alter_commands.empty();
if (1 < command_types_count)
bool mixed_settings_amd_metadata_alter = alter_commands.hasSettingsAlterCommand() && !alter_commands.isSettingsAlter();
if (1 < command_types_count || mixed_settings_amd_metadata_alter)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "For Replicated databases it's not allowed "
"to execute ALTERs of different types in single query");
}

View File

@ -8,6 +8,7 @@
#include <Common/Macros.h>
#include <Common/randomSeed.h>
#include <Common/renameat2.h>
#include <Common/hex.h>
#include <Core/Defines.h>
#include <Core/Settings.h>
@ -31,7 +32,9 @@
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
@ -84,7 +87,6 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE;
extern const int PATH_ACCESS_DENIED;
extern const int NOT_IMPLEMENTED;
extern const int UNKNOWN_TABLE;
}
namespace fs = std::filesystem;
@ -803,36 +805,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
create.uuid = UUIDHelpers::Nil;
create.to_inner_uuid = UUIDHelpers::Nil;
}
if (create.replace_table)
{
if (database->getUUID() == UUIDHelpers::Nil)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"{} query is supported only for Atomic databases",
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
UUID uuid_of_table_to_replace;
if (create.create_or_replace)
{
uuid_of_table_to_replace = getContext()->tryResolveStorageID(StorageID(create.database, create.table)).uuid;
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
{
/// Convert to usual CREATE
create.replace_table = false;
assert(!database->isTableExist(create.table, getContext()));
}
else
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
}
else
{
uuid_of_table_to_replace = getContext()->resolveStorageID(StorageID(create.database, create.table)).uuid;
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
backQuoteIfNeed(create.database), backQuoteIfNeed(create.table));
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
}
}
}
@ -856,7 +828,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.table);
if (auto* ptr = typeid_cast<DatabaseReplicated *>(database.get());
ptr && getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
ptr && !getContext()->getClientInfo().is_replicated_database_internal)
{
create.database = database_name;
guard->releaseTableLock();
@ -950,7 +922,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
auto guard = DatabaseCatalog::instance().getDDLGuard(create.database, create.table);
if (auto * ptr = typeid_cast<DatabaseReplicated *>(database.get());
ptr && getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
ptr && !getContext()->getClientInfo().is_replicated_database_internal)
{
assertOrSetUUID(create, database);
guard->releaseTableLock();
@ -1110,23 +1082,72 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
const InterpreterCreateQuery::TableProperties & properties)
{
/// Replicated database requires separate contexts for each DDL query
ContextPtr current_context = getContext();
ContextMutablePtr create_context = Context::createCopy(current_context);
create_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
auto make_drop_context = [&](bool on_error) -> ContextMutablePtr
{
ContextMutablePtr drop_context = Context::createCopy(current_context);
drop_context->makeQueryContext();
if (on_error)
return drop_context;
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// Execute drop as separate query, because [CREATE OR] REPLACE query can be considered as
/// successfully executed after RENAME/EXCHANGE query.
drop_context->resetZooKeeperMetadataTransaction();
auto drop_txn = std::make_shared<ZooKeeperMetadataTransaction>(txn->getZooKeeper(), txn->getDatabaseZooKeeperPath(),
txn->isInitialQuery(), txn->getTaskZooKeeperPath());
drop_context->initZooKeeperMetadataTransaction(drop_txn);
}
return drop_context;
};
auto ast_drop = std::make_shared<ASTDropQuery>();
String table_to_replace_name = create.table;
bool created = false;
bool replaced = false;
try
{
[[maybe_unused]] bool done = doCreateTable(create, properties);
assert(done);
auto database = DatabaseCatalog::instance().getDatabase(create.database);
if (database->getUUID() == UUIDHelpers::Nil)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"{} query is supported only for Atomic databases",
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
UInt64 name_hash = sipHash64(create.database + create.table);
UInt16 random_suffix = thread_local_rng();
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// Avoid different table name on database replicas
random_suffix = sipHash64(txn->getTaskZooKeeperPath());
}
create.table = fmt::format("_tmp_replace_{}_{}",
getHexUIntLowercase(name_hash),
getHexUIntLowercase(random_suffix));
ast_drop->table = create.table;
ast_drop->is_dictionary = create.is_dictionary;
ast_drop->database = create.database;
ast_drop->kind = ASTDropQuery::Drop;
created = true;
if (!create.replace_table)
return fillTableIfNeeded(create);
}
bool created = false;
bool renamed = false;
try
{
/// Create temporary table (random name will be generated)
[[maybe_unused]] bool done = InterpreterCreateQuery(query_ptr, create_context).doCreateTable(create, properties);
assert(done);
created = true;
/// Try fill temporary table
BlockIO fill_io = fillTableIfNeeded(create);
executeTrivialBlockIO(fill_io, getContext());
/// Replace target table with created one
auto ast_rename = std::make_shared<ASTRenameQuery>();
ASTRenameQuery::Element elem
{
@ -1135,22 +1156,44 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
};
ast_rename->elements.push_back(std::move(elem));
ast_rename->exchange = true;
ast_rename->dictionary = create.is_dictionary;
if (create.create_or_replace)
{
/// CREATE OR REPLACE TABLE
/// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
ast_rename->rename_if_cannot_exchange = true;
ast_rename->exchange = false;
}
else
{
/// REPLACE TABLE
/// Will execute EXCHANGE query and fail if the target table does not exist
ast_rename->exchange = true;
}
InterpreterRenameQuery(ast_rename, getContext()).execute();
replaced = true;
InterpreterRenameQuery interpreter_rename{ast_rename, current_context};
interpreter_rename.execute();
renamed = true;
InterpreterDropQuery(ast_drop, getContext()).execute();
if (!interpreter_rename.renamedInsteadOfExchange())
{
/// Target table was replaced with new one, drop old table
auto drop_context = make_drop_context(false);
InterpreterDropQuery(ast_drop, drop_context).execute();
}
create.table = table_to_replace_name;
return fillTableIfNeeded(create);
return {};
}
catch (...)
{
if (created && create.replace_table && !replaced)
InterpreterDropQuery(ast_drop, getContext()).execute();
/// Drop temporary table if it was successfully created, but was not renamed to target name
if (created && !renamed)
{
auto drop_context = make_drop_context(true);
InterpreterDropQuery(ast_drop, drop_context).execute();
}
throw;
}
}

View File

@ -133,7 +133,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
/// Prevents recursive drop from drop database query. The original query must specify a table.
bool is_drop_or_detach_database = query_ptr->as<ASTDropQuery>()->table.empty();
bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY &&
!getContext()->getClientInfo().is_replicated_database_internal &&
!is_drop_or_detach_database;
AccessFlags drop_storage;
@ -426,6 +426,7 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// For Replicated database
drop_context->getClientInfo().is_replicated_database_internal = true;
drop_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
drop_context->initZooKeeperMetadataTransaction(txn, true);
}

View File

@ -72,16 +72,31 @@ BlockIO InterpreterRenameQuery::execute()
BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
{
assert(!rename.rename_if_cannot_exchange || descriptions.size() == 1);
assert(!(rename.rename_if_cannot_exchange && rename.exchange));
auto & database_catalog = DatabaseCatalog::instance();
for (const auto & elem : descriptions)
{
if (!rename.exchange)
bool exchange_tables;
if (rename.exchange)
{
exchange_tables = true;
}
else if (rename.rename_if_cannot_exchange)
{
exchange_tables = database_catalog.isTableExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
renamed_instead_of_exchange = !exchange_tables;
}
else
{
exchange_tables = false;
database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
}
DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
if (typeid_cast<DatabaseReplicated *>(database.get())
&& getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY)
&& !getContext()->getClientInfo().is_replicated_database_internal)
{
if (1 < descriptions.size())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database {} is Replicated, "
@ -100,7 +115,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
elem.from_table_name,
*database_catalog.getDatabase(elem.to_database_name),
elem.to_table_name,
rename.exchange,
exchange_tables,
rename.dictionary);
}
}

View File

@ -55,6 +55,8 @@ public:
BlockIO execute() override;
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;
bool renamedInsteadOfExchange() const { return renamed_instead_of_exchange; }
private:
BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
@ -62,6 +64,7 @@ private:
AccessRightsElements getRequiredAccess() const;
ASTPtr query_ptr;
bool renamed_instead_of_exchange{false};
};
}

Some files were not shown because too many files have changed in this diff Show More