Merge branch 'master' into roaring-memory-tracker

This commit is contained in:
Alexey Milovidov 2021-08-07 19:01:31 +03:00
commit c9d66defa0
211 changed files with 2783 additions and 1042 deletions

2
contrib/AMQP-CPP vendored

@ -1 +1 @@
Subproject commit 03781aaff0f10ef41f902b8cf865fe0067180c10
Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a

View File

@ -10,11 +10,12 @@ set (SRCS
"${LIBRARY_DIR}/src/deferredconsumer.cpp"
"${LIBRARY_DIR}/src/deferredextreceiver.cpp"
"${LIBRARY_DIR}/src/deferredget.cpp"
"${LIBRARY_DIR}/src/deferredpublisher.cpp"
"${LIBRARY_DIR}/src/deferredrecall.cpp"
"${LIBRARY_DIR}/src/deferredreceiver.cpp"
"${LIBRARY_DIR}/src/field.cpp"
"${LIBRARY_DIR}/src/flags.cpp"
"${LIBRARY_DIR}/src/linux_tcp/openssl.cpp"
"${LIBRARY_DIR}/src/linux_tcp/sslerrorprinter.cpp"
"${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp"
"${LIBRARY_DIR}/src/inbuffer.cpp"
"${LIBRARY_DIR}/src/receivedframe.cpp"

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit debf751a129bdda9ff4d1e895e08957ff77000a1
Subproject commit 078e21bad344747b7656ef2d7a4f7410a0a303eb

View File

@ -194,9 +194,18 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -207,6 +216,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
@ -214,15 +224,18 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/csv/chunker.cc"
"${LIBRARY_DIR}/csv/column_builder.cc"
@ -231,6 +244,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/csv/options.cc"
"${LIBRARY_DIR}/csv/parser.cc"
"${LIBRARY_DIR}/csv/reader.cc"
"${LIBRARY_DIR}/csv/writer.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc"
@ -247,6 +261,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
@ -257,9 +272,9 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb

View File

@ -183,6 +183,10 @@ for conn_index, c in enumerate(all_connections):
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
c.settings[s.tag] = s.text
# We have to perform a query to make sure the settings work. Otherwise an
# unknown setting will lead to failing precondition check, and we will skip
# the test, which is wrong.
c.execute("select 1")
reportStageEnd('settings')

View File

@ -28,7 +28,7 @@ RUN apt-get update --yes \
ENV PKG_VERSION="pvs-studio-latest"
RUN set -x \
&& export PUBKEY_HASHSUM="486a0694c7f92e96190bbfac01c3b5ac2cb7823981db510a28f744c99eabbbf17a7bcee53ca42dc6d84d4323c2742761" \
&& export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
&& apt-key add /tmp/pubkey.txt \

View File

@ -15,7 +15,7 @@ Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `
``` sql
CREATE DATABASE test_database
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use_table_cache`]);
```
**Engine Parameters**
@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — Remote database name.
- `user` — PostgreSQL user.
- `password` — User password.
- `schema` — PostgreSQL schema.
- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`.
## Data Types Support {#data_types-support}

View File

@ -84,6 +84,8 @@ Features:
- Table data preview.
- Full-text search.
By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set session_id to a random string (it uses the http connection under the hood). Then you can use any setting from the query window
### clickhouse-cli {#clickhouse-cli}
[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) is an alternative command-line client for ClickHouse, written in Python 3.

View File

@ -82,6 +82,7 @@ The next 4 columns have a non-zero value only where there is an active session w
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly.

View File

@ -2138,3 +2138,52 @@ Result:
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
## currentProfiles {#current-profiles}
Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user.
The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
**Syntax**
``` sql
currentProfiles()
```
**Returned value**
- List of the current user settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## enabledProfiles {#enabled-profiles}
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
**Syntax**
``` sql
enabledProfiles()
```
**Returned value**
- List of the enabled settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## defaultProfiles {#default-profiles}
Returns all the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement).
**Syntax**
``` sql
defaultProfiles()
```
**Returned value**
- List of the default settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

View File

@ -11,7 +11,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types
**Syntax**
``` sql
```sql
map(key1, value1[, key2, value2, ...])
```
@ -30,7 +30,7 @@ Type: [Map(key, value)](../../sql-reference/data-types/map.md).
Query:
``` sql
```sql
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
```
@ -46,7 +46,7 @@ Result:
Query:
``` sql
```sql
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
SELECT a['key2'] FROM table_map;
@ -54,7 +54,7 @@ SELECT a['key2'] FROM table_map;
Result:
``` text
```text
┌─arrayElement(a, 'key2')─┐
│ 0 │
│ 2 │
@ -72,7 +72,7 @@ Collect all the keys and sum corresponding values.
**Syntax**
``` sql
```sql
mapAdd(arg1, arg2 [, ...])
```
@ -88,13 +88,13 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq
Query with a tuple map:
``` sql
```sql
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
```
Result:
``` text
```text
┌─res───────────┬─type───────────────────────────────┐
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
└───────────────┴────────────────────────────────────┘
@ -102,7 +102,16 @@ Result:
Query with `Map` type:
``` sql
```sql
SELECT mapAdd(map(1,1), map(1,1));
```
Result:
```text
┌─mapAdd(map(1, 1), map(1, 1))─┐
│ {1:2} │
└──────────────────────────────┘
```
## mapSubtract {#function-mapsubtract}
@ -111,21 +120,21 @@ Collect all the keys and subtract corresponding values.
**Syntax**
``` sql
```sql
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
```
**Arguments**
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
**Returned value**
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
**Example**
Query:
Query with a tuple map:
```sql
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
@ -139,32 +148,54 @@ Result:
└────────────────┴───────────────────────────────────┘
```
Query with `Map` type:
```sql
SELECT mapSubtract(map(1,1), map(1,1));
```
Result:
```text
┌─mapSubtract(map(1, 1), map(1, 1))─┐
│ {1:0} │
└───────────────────────────────────┘
```
## mapPopulateSeries {#function-mappopulateseries}
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Syntax**
``` sql
```sql
mapPopulateSeries(keys, values[, max])
mapPopulateSeries(map[, max])
```
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
The number of elements in `keys` and `values` must be the same for each row.
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
**Arguments**
Mapped arrays:
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
or
- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
**Example**
Query:
Query with mapped arrays:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
@ -178,13 +209,27 @@ Result:
└──────────────────────────────┴───────────────────────────────────┘
```
Query with `Map` type:
```sql
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
```
Result:
```text
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
└─────────────────────────────────────────┘
```
## mapContains {#mapcontains}
Determines whether the `map` contains the `key` parameter.
**Syntax**
``` sql
```sql
mapContains(map, key)
```

View File

@ -5,9 +5,6 @@ toc_title: Window Functions
# [experimental] Window Functions
!!! warning "Warning"
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in the future releases. Set `allow_experimental_window_functions = 1` to enable it.
ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
| Feature | Support or workaround |

View File

@ -87,7 +87,7 @@ toc_title: "Введение"
Виртуальный столбец — это неотъемлемый атрибут движка таблиц, определенный в исходном коде движка.
Виртуальные столбцы не надо указывать в запросе `CREATE TABLE` и их не отображаются в результатах запросов `SHOW CREATE TABLE` и `DESCRIBE TABLE`. Также виртуальные столбцы доступны только для чтения, поэтому вы не можете вставлять в них данные.
Виртуальные столбцы не надо указывать в запросе `CREATE TABLE` и они не отображаются в результатах запросов `SHOW CREATE TABLE` и `DESCRIBE TABLE`. Также виртуальные столбцы доступны только для чтения, поэтому вы не можете вставлять в них данные.
Чтобы получить данные из виртуального столбца, необходимо указать его название в запросе `SELECT`. `SELECT *` не отображает данные из виртуальных столбцов.

View File

@ -134,7 +134,7 @@ default
- `regexp` шаблон имени метрики.
- `age` минимальный возраст данных в секундах.
- `precision` точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
- `function` имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`.
- `function` имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
### Пример конфигурации {#configuration-example}
@ -171,3 +171,6 @@ default
</graphite_rollup>
```
!!! warning "Внимание"
Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize/). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).

View File

@ -111,7 +111,7 @@ toc_title: "Визуальные интерфейсы от сторонних р
### DataGrip {#datagrip}
[DataGrip](https://www.jetbrains.com/datagrip/) — это IDE для баз данных о JetBrains с выделенной поддержкой ClickHouse. Он также встроен в другие инструменты на основе IntelliJ: PyCharm, IntelliJ IDEA, GoLand, PhpStorm и другие.
[DataGrip](https://www.jetbrains.com/datagrip/) — это IDE для баз данных от JetBrains с выделенной поддержкой ClickHouse. Он также встроен в другие инструменты на основе IntelliJ: PyCharm, IntelliJ IDEA, GoLand, PhpStorm и другие.
Основные возможности:

View File

@ -2088,3 +2088,52 @@ SELECT tcpPort();
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
## currentProfiles {#current-profiles}
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
**Синтаксис**
``` sql
currentProfiles()
```
**Возвращаемое значение**
- Список профилей настроек для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## enabledProfiles {#enabled-profiles}
Возвращает профили настроек, назначенные пользователю как явно, так и неявно. Явно назначенные профили — это те же профили, которые возвращает функция [currentProfiles](#current-profiles). Неявно назначенные профили включают родительские профили других назначенных профилей; профили, назначенные с помощью предоставленных ролей; профили, назначенные с помощью собственных настроек; основной профиль по умолчанию (см. секцию `default_profile` в основном конфигурационном файле сервера).
**Синтаксис**
``` sql
enabledProfiles()
```
**Возвращаемое значение**
- Список доступных профилей для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## defaultProfiles {#default-profiles}
Возвращает все профили, указанные при объявлении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement))
**Синтаксис**
``` sql
defaultProfiles()
```
**Возвращаемое значение**
- Список профилей по умолчанию.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

View File

@ -197,7 +197,7 @@ private:
std::unique_ptr<ShellCommand> pager_cmd;
/// The user can specify to redirect query output to a file.
std::optional<WriteBufferFromFile> out_file_buf;
std::unique_ptr<WriteBuffer> out_file_buf;
BlockOutputStreamPtr block_out_stream;
/// The user could specify special file for server logs (stderr by default)
@ -1452,7 +1452,12 @@ private:
"Error while reconnecting to the server: {}\n",
getCurrentExceptionMessage(true));
assert(!connection->isConnected());
// The reconnection might fail, but we'll still be connected
// in the sense of `connection->isConnected() = true`,
// in case when the requested database doesn't exist.
// Disconnect manually now, so that the following code doesn't
// have any doubts, and the connection state is predictable.
connection->disconnect();
}
}
@ -2238,8 +2243,11 @@ private:
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
const auto & out_file = out_file_node.value.safeGet<std::string>();
out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
out_buf = &*out_file_buf;
out_file_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
chooseCompressionMethod(out_file, ""),
/* compression level = */ 3
);
// We are writing to file, so default format is the same as in non-interactive mode.
if (is_interactive && is_default_format)
@ -2259,9 +2267,9 @@ private:
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!need_render_progress)
block_out_stream = context->getOutputStreamParallelIfPossible(current_format, *out_buf, block);
block_out_stream = context->getOutputStreamParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
block_out_stream = context->getOutputStream(current_format, *out_buf, block);
block_out_stream = context->getOutputStream(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
block_out_stream->writePrefix();
}

View File

@ -12,8 +12,8 @@ namespace DB
Poco::URI uri{request.getURI()};
LOG_DEBUG(log, "Request URI: {}", uri.toString());
if (uri == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<PingHandler>(keep_alive_timeout);
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<LibraryExistsHandler>(keep_alive_timeout, getContext());
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
return std::make_unique<LibraryRequestHandler>(keep_alive_timeout, getContext());

View File

@ -17,8 +17,24 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_REQUEST_PARAMETER;
}
namespace
{
void processError(HTTPServerResponse & response, const std::string & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), message);
}
std::shared_ptr<Block> parseColumns(std::string && column_string)
{
auto sample_block = std::make_shared<Block>();
@ -30,9 +46,8 @@ namespace
return sample_block;
}
std::vector<uint64_t> parseIdsFromBinary(const std::string & ids_string)
std::vector<uint64_t> parseIdsFromBinary(ReadBuffer & buf)
{
ReadBufferFromString buf(ids_string);
std::vector<uint64_t> ids;
readVectorBinary(ids, buf);
return ids;
@ -67,13 +82,36 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
std::string method = params.get("method");
std::string dictionary_id = params.get("dictionary_id");
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{
if (method == "libNew")
bool lib_new = (method == "libNew");
if (method == "libClone")
{
if (!params.has("from_dictionary_id"))
{
processError(response, "No 'from_dictionary_id' in request URL");
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
bool cloned = false;
cloned = SharedLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
if (cloned)
{
writeStringBinary("1", out);
}
else
{
LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call libNew instead");
lib_new = true;
}
}
if (lib_new)
{
auto & read_buf = request.getStream();
params.read(read_buf);
@ -92,6 +130,8 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
std::string library_path = params.get("library_path");
const auto & settings_string = params.get("library_settings");
LOG_DEBUG(log, "Parsing library settings from binary string");
std::vector<std::string> library_settings = parseNamesFromBinary(settings_string);
/// Needed for library dictionary
@ -102,6 +142,8 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
const auto & attributes_string = params.get("attributes_names");
LOG_DEBUG(log, "Parsing attributes names from binary string");
std::vector<std::string> attributes_names = parseNamesFromBinary(attributes_string);
/// Needed to parse block from binary string format
@ -140,54 +182,63 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
SharedLibraryHandlerFactory::instance().create(dictionary_id, library_path, library_settings, sample_block_with_nulls, attributes_names);
writeStringBinary("1", out);
}
else if (method == "libClone")
{
if (!params.has("from_dictionary_id"))
{
processError(response, "No 'from_dictionary_id' in request URL");
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
LOG_TRACE(log, "Calling libClone from {} to {}", from_dictionary_id, dictionary_id);
SharedLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
writeStringBinary("1", out);
}
else if (method == "libDelete")
{
SharedLibraryHandlerFactory::instance().remove(dictionary_id);
auto deleted = SharedLibraryHandlerFactory::instance().remove(dictionary_id);
/// Do not throw, a warning is ok.
if (!deleted)
LOG_WARNING(log, "Cannot delete library for with dictionary id: {}, because such id was not found.", dictionary_id);
writeStringBinary("1", out);
}
else if (method == "isModified")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->isModified();
writeStringBinary(std::to_string(res), out);
}
else if (method == "supportsSelectiveLoad")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->supportsSelectiveLoad();
writeStringBinary(std::to_string(res), out);
}
else if (method == "loadAll")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadAll() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadAll();
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
else if (method == "loadIds")
{
LOG_DEBUG(log, "Getting diciontary ids for dictionary with id: {}", dictionary_id);
String ids_string;
readString(ids_string, request.getStream());
std::vector<uint64_t> ids = parseIdsFromBinary(ids_string);
std::vector<uint64_t> ids = parseIdsFromBinary(request.getStream());
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadIds() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadIds(ids);
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
@ -219,8 +270,14 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
auto block = reader->read();
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadKeys() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadKeys(block.getColumns());
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
BlockOutputStreamPtr output = FormatFactory::instance().getOutputStream(FORMAT, out, sample_block, getContext());
copyData(*input, *output);
}
@ -228,8 +285,9 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
catch (...)
{
auto message = getCurrentExceptionMessage(true);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
LOG_ERROR(log, "Failed to process request for dictionary_id: {}. Error: {}", dictionary_id, message);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
try
{
writeStringBinary(message, out);
@ -239,8 +297,6 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
{
tryLogCurrentException(log);
}
tryLogCurrentException(log);
}
try
@ -254,24 +310,30 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
void LibraryRequestHandler::processError(HTTPServerResponse & response, const std::string & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
LOG_WARNING(log, message);
}
void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
void LibraryExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
if (!params.has("dictionary_id"))
{
processError(response, "No 'dictionary_id' in request URL");
return;
}
std::string dictionary_id = params.get("dictionary_id");
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
String res;
if (library_handler)
res = "1";
else
res = "0";
setResponseDefaultHeaders(response, keep_alive_timeout);
const char * data = "Ok.\n";
response.sendBuffer(data, strlen(data));
LOG_TRACE(log, "Senging ping response: {} (dictionary id: {})", res, dictionary_id);
response.sendBuffer(res.data(), res.size());
}
catch (...)
{

View File

@ -22,8 +22,7 @@ class LibraryRequestHandler : public HTTPRequestHandler, WithContext
public:
LibraryRequestHandler(
size_t keep_alive_timeout_,
ContextPtr context_)
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
@ -35,18 +34,18 @@ public:
private:
static constexpr inline auto FORMAT = "RowBinary";
void processError(HTTPServerResponse & response, const std::string & message);
Poco::Logger * log;
size_t keep_alive_timeout;
};
class PingHandler : public HTTPRequestHandler
class LibraryExistsHandler : public HTTPRequestHandler, WithContext
{
public:
explicit PingHandler(size_t keep_alive_timeout_)
: keep_alive_timeout(keep_alive_timeout_)
explicit LibraryExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
{
}
@ -54,6 +53,8 @@ public:
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
}

View File

@ -4,12 +4,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
@ -18,7 +12,7 @@ SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dic
if (library_handler != library_handlers.end())
return library_handler->second;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found dictionary with id: {}", dictionary_id);
return nullptr;
}
@ -30,32 +24,32 @@ void SharedLibraryHandlerFactory::create(
const std::vector<std::string> & attributes_names)
{
std::lock_guard lock(mutex);
library_handlers[dictionary_id] = std::make_shared<SharedLibraryHandler>(library_path, library_settings, sample_block, attributes_names);
if (!library_handlers.count(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<SharedLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
LOG_WARNING(&Poco::Logger::get("SharedLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
}
void SharedLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
bool SharedLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
/// This is not supposed to happen as libClone is called from copy constructor of LibraryDictionarySource
/// object, and shared library handler of from_dictionary is removed only in its destructor.
/// And if for from_dictionary there was no shared library handler, it would have received and exception in
/// its constructor, so no libClone would be made from it.
if (from_library_handler == library_handlers.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No shared library handler found");
return false;
/// libClone method will be called in copy constructor
library_handlers[to_dictionary_id] = std::make_shared<SharedLibraryHandler>(*from_library_handler->second);
return true;
}
void SharedLibraryHandlerFactory::remove(const std::string & dictionary_id)
bool SharedLibraryHandlerFactory::remove(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
/// libDelete is called in destructor.
library_handlers.erase(dictionary_id);
return library_handlers.erase(dictionary_id);
}

View File

@ -24,9 +24,9 @@ public:
const Block & sample_block,
const std::vector<std::string> & attributes_names);
void clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
bool clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
void remove(const std::string & dictionary_id);
bool remove(const std::string & dictionary_id);
private:
/// map: dict_id -> sharedLibraryHandler

View File

@ -361,23 +361,22 @@
function postImpl(posted_request_num, query)
{
/// TODO: Check if URL already contains query string (append parameters).
const user = document.getElementById('user').value;
const password = document.getElementById('password').value;
let user = document.getElementById('user').value;
let password = document.getElementById('password').value;
const server_address = document.getElementById('url').value;
let server_address = document.getElementById('url').value;
let url = server_address +
const url = server_address +
(server_address.indexOf('?') >= 0 ? '&' : '?') +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
'add_http_cors_header=1' +
'&user=' + encodeURIComponent(user) +
'&password=' + encodeURIComponent(password) +
'&default_format=JSONCompact' +
/// Safety settings to prevent results that browser cannot display.
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
let xhr = new XMLHttpRequest;
const xhr = new XMLHttpRequest;
xhr.open('POST', url, true);
@ -391,12 +390,12 @@
/// The query is saved in browser history (in state JSON object)
/// as well as in URL fragment identifier.
if (query != previous_query) {
let state = {
const state = {
query: query,
status: this.status,
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
};
let title = "ClickHouse Query: " + query;
const title = "ClickHouse Query: " + query;
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (server_address != location.origin) {

View File

@ -33,24 +33,9 @@ Poco::URI IBridgeHelper::getPingURI() const
}
bool IBridgeHelper::checkBridgeIsRunning() const
void IBridgeHelper::startBridgeSync()
{
try
{
ReadWriteBufferFromHTTP buf(
getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()));
return checkString(PING_OK_ANSWER, buf);
}
catch (...)
{
return false;
}
}
void IBridgeHelper::startBridgeSync() const
{
if (!checkBridgeIsRunning())
if (!bridgeHandShake())
{
LOG_TRACE(getLog(), "{} is not running, will try to start it", serviceAlias());
startBridge(startBridgeCommand());
@ -64,7 +49,7 @@ void IBridgeHelper::startBridgeSync() const
++counter;
LOG_TRACE(getLog(), "Checking {} is running, try {}", serviceAlias(), counter);
if (checkBridgeIsRunning())
if (bridgeHandShake())
{
started = true;
break;
@ -81,7 +66,7 @@ void IBridgeHelper::startBridgeSync() const
}
std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand() const
std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
{
if (startBridgeManually())
throw Exception(serviceAlias() + " is not running. Please, start it manually", ErrorCodes::EXTERNAL_SERVER_IS_NOT_RESPONDING);

View File

@ -28,16 +28,19 @@ public:
static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST;
explicit IBridgeHelper(ContextPtr context_) : WithContext(context_) {}
virtual ~IBridgeHelper() = default;
void startBridgeSync() const;
virtual ~IBridgeHelper() = default;
Poco::URI getMainURI() const;
Poco::URI getPingURI() const;
void startBridgeSync();
protected:
/// Check bridge is running. Can also check something else in the mean time.
virtual bool bridgeHandShake() = 0;
/// clickhouse-odbc-bridge, clickhouse-library-bridge
virtual String serviceAlias() const = 0;
@ -61,9 +64,7 @@ protected:
private:
bool checkBridgeIsRunning() const;
std::unique_ptr<ShellCommand> startBridgeCommand() const;
std::unique_ptr<ShellCommand> startBridgeCommand();
};
}

View File

@ -1,6 +1,5 @@
#include "LibraryBridgeHelper.h"
#include <IO/ReadHelpers.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <DataStreams/formatBlock.h>
@ -8,6 +7,8 @@
#include <Processors/Formats/InputStreamFromInputFormat.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Formats/FormatFactory.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/ShellCommand.h>
@ -20,16 +21,25 @@
namespace DB
{
namespace ErrorCodes
{
extern const int EXTERNAL_LIBRARY_ERROR;
extern const int LOGICAL_ERROR;
}
LibraryBridgeHelper::LibraryBridgeHelper(
ContextPtr context_,
const Block & sample_block_,
const Field & dictionary_id_)
const Field & dictionary_id_,
const LibraryInitData & library_data_)
: IBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get("LibraryBridgeHelper"))
, sample_block(sample_block_)
, config(context_->getConfigRef())
, http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value)
, library_data(library_data_)
, dictionary_id(dictionary_id_)
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_))
{
bridge_port = config.getUInt("library_bridge.port", DEFAULT_PORT);
bridge_host = config.getString("library_bridge.host", DEFAULT_HOST);
@ -61,26 +71,91 @@ void LibraryBridgeHelper::startBridge(std::unique_ptr<ShellCommand> cmd) const
}
bool LibraryBridgeHelper::initLibrary(const std::string & library_path, const std::string library_settings, const std::string attributes_names)
bool LibraryBridgeHelper::bridgeHandShake()
{
startBridgeSync();
auto uri = createRequestURI(LIB_NEW_METHOD);
String result;
try
{
ReadWriteBufferFromHTTP buf(createRequestURI(PING), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts);
readString(result, buf);
}
catch (...)
{
return false;
}
/*
* When pinging bridge we also pass current dicionary_id. The bridge will check if there is such
* dictionary. It is possible that such dictionary_id is not present only in two cases:
* 1. It is dictionary source creation and initialization of library handler on bridge side did not happen yet.
* 2. Bridge crashed or restarted for some reason while server did not.
**/
if (result.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check bridge and server have the same version.", result);
UInt8 dictionary_id_exists;
auto parsed = tryParse<UInt8>(dictionary_id_exists, result);
if (!parsed || (dictionary_id_exists != 0 && dictionary_id_exists != 1))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {} ({}). Check bridge and server have the same version.",
result, parsed ? toString(dictionary_id_exists) : "failed to parse");
LOG_TRACE(log, "dictionary_id: {}, dictionary_id_exists on bridge side: {}, library confirmed to be initialized on server side: {}",
toString(dictionary_id), toString(dictionary_id_exists), library_initialized);
if (dictionary_id_exists && !library_initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Library was not initialized, but bridge responded to already have dictionary id: {}", dictionary_id);
/// Here we want to say bridge to recreate a new library handler for current dictionary,
/// because it responded to have lost it, but we know that it has already been created. (It is a direct result of bridge crash).
if (!dictionary_id_exists && library_initialized)
{
LOG_WARNING(log, "Library bridge does not have library handler with dictionaty id: {}. It will be reinitialized.", dictionary_id);
bool reinitialized = false;
try
{
auto uri = createRequestURI(LIB_NEW_METHOD);
reinitialized = executeRequest(uri, getInitLibraryCallback());
}
catch (...)
{
tryLogCurrentException(log);
return false;
}
if (!reinitialized)
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR,
"Failed to reinitialize library handler on bridge side for dictionary with id: {}", dictionary_id);
}
return true;
}
ReadWriteBufferFromHTTP::OutStreamCallback LibraryBridgeHelper::getInitLibraryCallback() const
{
/// Sample block must contain null values
WriteBufferFromOwnString out;
auto output_stream = getContext()->getOutputStream(LibraryBridgeHelper::DEFAULT_FORMAT, out, sample_block);
formatBlock(output_stream, sample_block);
auto block_string = out.str();
auto out_stream_callback = [library_path, library_settings, attributes_names, block_string, this](std::ostream & os)
return [block_string, this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(library_path) << "&";
os << "library_settings=" << escapeForFileName(library_settings) << "&";
os << "attributes_names=" << escapeForFileName(attributes_names) << "&";
os << "library_path=" << escapeForFileName(library_data.library_path) << "&";
os << "library_settings=" << escapeForFileName(library_data.library_settings) << "&";
os << "attributes_names=" << escapeForFileName(library_data.dict_attributes) << "&";
os << "sample_block=" << escapeForFileName(sample_block.getNamesAndTypesList().toString()) << "&";
os << "null_values=" << escapeForFileName(block_string);
};
return executeRequest(uri, out_stream_callback);
}
bool LibraryBridgeHelper::initLibrary()
{
startBridgeSync();
auto uri = createRequestURI(LIB_NEW_METHOD);
library_initialized = executeRequest(uri, getInitLibraryCallback());
return library_initialized;
}
@ -89,15 +164,23 @@ bool LibraryBridgeHelper::cloneLibrary(const Field & other_dictionary_id)
startBridgeSync();
auto uri = createRequestURI(LIB_CLONE_METHOD);
uri.addQueryParameter("from_dictionary_id", toString(other_dictionary_id));
return executeRequest(uri);
/// We also pass initialization settings in order to create a library handler
/// in case from_dictionary_id does not exist in bridge side (possible in case of bridge crash).
library_initialized = executeRequest(uri, getInitLibraryCallback());
return library_initialized;
}
bool LibraryBridgeHelper::removeLibrary()
{
startBridgeSync();
auto uri = createRequestURI(LIB_DELETE_METHOD);
return executeRequest(uri);
/// Do not force bridge restart if it is not running in case of removeLibrary
/// because in this case after restart it will not have this dictionaty id in memory anyway.
if (bridgeHandShake())
{
auto uri = createRequestURI(LIB_DELETE_METHOD);
return executeRequest(uri);
}
return true;
}
@ -125,10 +208,12 @@ BlockInputStreamPtr LibraryBridgeHelper::loadAll()
}
BlockInputStreamPtr LibraryBridgeHelper::loadIds(const std::string ids_string)
BlockInputStreamPtr LibraryBridgeHelper::loadIds(const std::vector<uint64_t> & ids)
{
startBridgeSync();
auto uri = createRequestURI(LOAD_IDS_METHOD);
uri.addQueryParameter("ids_num", toString(ids.size())); /// Not used parameter, but helpful
auto ids_string = getDictIdsString(ids);
return loadBase(uri, [ids_string](std::ostream & os) { os << ids_string; });
}
@ -149,13 +234,13 @@ BlockInputStreamPtr LibraryBridgeHelper::loadKeys(const Block & requested_block)
}
bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback)
bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const
{
ReadWriteBufferFromHTTP buf(
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
ConnectionTimeouts::getHTTPTimeouts(getContext()));
http_timeouts);
bool res;
readBoolText(res, buf);
@ -169,7 +254,7 @@ BlockInputStreamPtr LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWri
uri,
Poco::Net::HTTPRequest::HTTP_POST,
std::move(out_stream_callback),
ConnectionTimeouts::getHTTPTimeouts(getContext()),
http_timeouts,
0,
Poco::Net::HTTPBasicCredentials{},
DBMS_DEFAULT_BUFFER_SIZE,
@ -179,4 +264,13 @@ BlockInputStreamPtr LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWri
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(read_buf_ptr));
}
String LibraryBridgeHelper::getDictIdsString(const std::vector<UInt64> & ids)
{
WriteBufferFromOwnString out;
writeVectorBinary(ids, out);
return out.str();
}
}

View File

@ -15,11 +15,18 @@ class LibraryBridgeHelper : public IBridgeHelper
{
public:
struct LibraryInitData
{
String library_path;
String library_settings;
String dict_attributes;
};
static constexpr inline size_t DEFAULT_PORT = 9012;
LibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_);
LibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
bool initLibrary(const std::string & library_path, std::string library_settings, std::string attributes_names);
bool initLibrary();
bool cloneLibrary(const Field & other_dictionary_id);
@ -31,16 +38,19 @@ public:
BlockInputStreamPtr loadAll();
BlockInputStreamPtr loadIds(std::string ids_string);
BlockInputStreamPtr loadIds(const std::vector<uint64_t> & ids);
BlockInputStreamPtr loadKeys(const Block & requested_block);
BlockInputStreamPtr loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {}) const;
LibraryInitData getLibraryData() const { return library_data; }
protected:
bool bridgeHandShake() override;
void startBridge(std::unique_ptr<ShellCommand> cmd) const override;
String serviceAlias() const override { return "clickhouse-library-bridge"; }
@ -61,6 +71,8 @@ protected:
Poco::URI createBaseURI() const override;
ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const;
private:
static constexpr inline auto LIB_NEW_METHOD = "libNew";
static constexpr inline auto LIB_CLONE_METHOD = "libClone";
@ -69,18 +81,24 @@ private:
static constexpr inline auto LOAD_IDS_METHOD = "loadIds";
static constexpr inline auto LOAD_KEYS_METHOD = "loadKeys";
static constexpr inline auto IS_MODIFIED_METHOD = "isModified";
static constexpr inline auto PING = "ping";
static constexpr inline auto SUPPORTS_SELECTIVE_LOAD_METHOD = "supportsSelectiveLoad";
Poco::URI createRequestURI(const String & method) const;
static String getDictIdsString(const std::vector<UInt64> & ids);
Poco::Logger * log;
const Block sample_block;
const Poco::Util::AbstractConfiguration & config;
const Poco::Timespan http_timeout;
LibraryInitData library_data;
Field dictionary_id;
std::string bridge_host;
size_t bridge_port;
bool library_initialized = false;
ConnectionTimeouts http_timeouts;
};
}

View File

@ -60,20 +60,33 @@ public:
static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
XDBCBridgeHelper(
ContextPtr context_,
Poco::Timespan http_timeout_,
const std::string & connection_string_)
: IXDBCBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
, connection_string(connection_string_)
, http_timeout(http_timeout_)
, config(context_->getGlobalContext()->getConfigRef())
{
bridge_host = config.getString(BridgeHelperMixin::configPrefix() + ".host", DEFAULT_HOST);
bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT);
}
ContextPtr context_,
Poco::Timespan http_timeout_,
const std::string & connection_string_)
: IXDBCBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
, connection_string(connection_string_)
, http_timeout(http_timeout_)
, config(context_->getGlobalContext()->getConfigRef())
{
bridge_host = config.getString(BridgeHelperMixin::configPrefix() + ".host", DEFAULT_HOST);
bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT);
}
protected:
bool bridgeHandShake() override
{
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()));
return checkString(PING_OK_ANSWER, buf);
}
catch (...)
{
return false;
}
}
auto getConnectionString() const { return connection_string; }
String getName() const override { return BridgeHelperMixin::getName(); }

View File

@ -132,6 +132,10 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -502,6 +502,10 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -410,6 +410,10 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_size = source[0];
if (bytes_size == 0)
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
if (UInt32(2 + bytes_to_skip) > source_size)

View File

@ -62,6 +62,7 @@ private:
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int ILLEGAL_CODEC_PARAMETER;
}
@ -93,7 +94,10 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si
void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
if (!success)
throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS);
}
void registerCodecLZ4(CompressionCodecFactory & factory)

View File

@ -412,13 +412,16 @@ template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match,
/// See also https://stackoverflow.com/a/30669632
template <size_t copy_amount, bool use_shuffle>
void NO_INLINE decompressImpl(
bool NO_INLINE decompressImpl(
const char * const source,
char * const dest,
size_t source_size,
size_t dest_size)
{
const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
UInt8 * op = reinterpret_cast<UInt8 *>(dest);
const UInt8 * const input_end = ip + source_size;
UInt8 * const output_begin = op;
UInt8 * const output_end = op + dest_size;
/// Unrolling with clang is doing >10% performance degrade.
@ -461,13 +464,19 @@ void NO_INLINE decompressImpl(
/// output: xyzHello, w
/// ^-op (we will overwrite excessive bytes on next iteration)
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (target == output_end)
return true;
}
ip += length;
op = copy_end;
if (copy_end >= output_end)
return;
if (unlikely(ip > input_end))
return false;
/// Get match offset.
@ -475,6 +484,9 @@ void NO_INLINE decompressImpl(
ip += 2;
const UInt8 * match = op - offset;
if (unlikely(match < output_begin))
return false;
/// Get match length.
length = token & 0x0F;
@ -515,7 +527,10 @@ void NO_INLINE decompressImpl(
copy<copy_amount>(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer.
if (length > copy_amount * 2)
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, copy_end);
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, target);
}
op = copy_end;
}
@ -524,7 +539,7 @@ void NO_INLINE decompressImpl(
}
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,
@ -532,7 +547,7 @@ void decompress(
PerformanceStatistics & statistics [[maybe_unused]])
{
if (source_size == 0 || dest_size == 0)
return;
return true;
/// Don't run timer if the block is too small.
if (dest_size >= 32768)
@ -542,24 +557,27 @@ void decompress(
/// Run the selected method and measure time.
Stopwatch watch;
bool success = true;
if (best_variant == 0)
decompressImpl<16, true>(source, dest, dest_size);
success = decompressImpl<16, true>(source, dest, source_size, dest_size);
if (best_variant == 1)
decompressImpl<16, false>(source, dest, dest_size);
success = decompressImpl<16, false>(source, dest, source_size, dest_size);
if (best_variant == 2)
decompressImpl<8, true>(source, dest, dest_size);
success = decompressImpl<8, true>(source, dest, source_size, dest_size);
if (best_variant == 3)
decompressImpl<32, false>(source, dest, dest_size);
success = decompressImpl<32, false>(source, dest, source_size, dest_size);
watch.stop();
/// Update performance statistics.
statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size);
return success;
}
else
{
decompressImpl<8, false>(source, dest, dest_size);
return decompressImpl<8, false>(source, dest, source_size, dest_size);
}
}

View File

@ -122,14 +122,14 @@ struct PerformanceStatistics
return choose_method;
}
PerformanceStatistics() {}
PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
PerformanceStatistics() = default;
explicit PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
};
/** This method dispatch to one of different implementations depending on performance statistics.
*/
void decompress(
bool decompress(
const char * const source,
char * const dest,
size_t source_size,

View File

@ -449,7 +449,6 @@ class IColumn;
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \

View File

@ -3,6 +3,7 @@
#include <Common/ProfileEvents.h>
#include <Common/CurrentThread.h>
#include <IO/WriteHelpers.h>
#include <Common/Stopwatch.h>
#include <common/sleep.h>
namespace ProfileEvents
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
}
}
bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
{
if (max_execution_time != 0
&& elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
if (max_execution_time != 0)
{
auto elapsed_ns = stopwatch.elapsed();
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
ErrorCodes::TIMEOUT_EXCEEDED);
}
return true;
}

View File

@ -3,6 +3,7 @@
#include <Poco/Timespan.h>
#include <common/types.h>
#include <DataStreams/SizeLimits.h>
#include <Common/Stopwatch.h>
namespace DB
{
@ -25,7 +26,7 @@ public:
/// Pause execution in case if speed limits were exceeded.
void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;
bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
};
}

View File

@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)
bool IBlockInputStream::checkTimeLimit() const
{
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
}

View File

@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
EXPECT_EQ(pipe.numOutputPorts(), 3);
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
pipe.addTransform(std::move(transform));
@ -128,7 +128,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
EXPECT_EQ(pipe.numOutputPorts(), 3);
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
pipe.addTransform(std::move(transform));

View File

@ -103,9 +103,11 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const String & engine_name = engine_define->engine->name;
const UUID & uuid = create.uuid;
bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL" ||
engine_name == "Lazy" || engine_name == "Replicated" || engine_name == "PostgreSQL" ||
engine_name == "MaterializedPostgreSQL" || engine_name == "SQLite";
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
@ -113,6 +115,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by;
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
if (has_unexpected_element || (!may_have_settings && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -233,11 +236,10 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 5)
throw Exception(fmt::format(
"{} Database require host:port, database_name, username, password arguments "
"[, use_table_cache = 0].", engine_name),
ErrorCodes::BAD_ARGUMENTS);
if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 6)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} Database require `host:port`, `database_name`, `username`, `password` [, `schema` = "", `use_table_cache` = 0].",
engine_name);
ASTs & engine_args = engine->arguments->children;
@ -249,9 +251,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);
String schema;
if (engine->arguments->children.size() >= 5)
schema = safeGetLiteralValue<String>(engine_args[4], engine_name);
auto use_table_cache = 0;
if (engine->arguments->children.size() == 5)
use_table_cache = safeGetLiteralValue<UInt64>(engine_args[4], engine_name);
if (engine->arguments->children.size() >= 6)
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[5], engine_name);
/// Split into replicas if needed.
size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
@ -266,7 +272,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
context->getSettingsRef().postgresql_connection_pool_wait_timeout);
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
context, metadata_path, engine_define, database_name, postgres_database_name, schema, connection_pool, use_table_cache);
}
else if (engine_name == "MaterializedPostgreSQL")
{
@ -274,9 +280,9 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
if (!engine->arguments || engine->arguments->children.size() != 4)
{
throw Exception(
fmt::format("{} Database require host:port, database_name, username, password arguments ", engine_name),
ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} Database require `host:port`, `database_name`, `username`, `password`.",
engine_name);
}
ASTs & engine_args = engine->arguments->children;

View File

@ -525,7 +525,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
query_context->getClientInfo().is_replicated_database_internal = true;
query_context->setCurrentDatabase(database_name);
query_context->setCurrentQueryId("");
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false);
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");
query_context->initZooKeeperMetadataTransaction(txn);
return query_context;
};

View File

@ -43,7 +43,7 @@ private:
mutable std::mutex mutex;
std::condition_variable wait_current_task_change;
String current_task;
UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
std::atomic<UInt32> logs_to_keep = std::numeric_limits<UInt32>::max();
};
}

View File

@ -39,14 +39,16 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const String & metadata_path_,
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
const String & postgres_dbname_,
const String & postgres_schema_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_)
: IDatabase(dbname_)
, WithContext(context_->getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, postgres_dbname(postgres_dbname_)
, postgres_schema(postgres_schema_)
, pool(std::move(pool_))
, cache_tables(cache_tables_)
{
@ -55,12 +57,28 @@ DatabasePostgreSQL::DatabasePostgreSQL(
}
String DatabasePostgreSQL::getTableNameForLogs(const String & table_name) const
{
if (postgres_schema.empty())
return fmt::format("{}.{}", postgres_dbname, table_name);
return fmt::format("{}.{}.{}", postgres_dbname, postgres_schema, table_name);
}
String DatabasePostgreSQL::formatTableName(const String & table_name) const
{
if (postgres_schema.empty())
return doubleQuoteString(table_name);
return fmt::format("{}.{}", doubleQuoteString(postgres_schema), doubleQuoteString(table_name));
}
bool DatabasePostgreSQL::empty() const
{
std::lock_guard<std::mutex> lock(mutex);
auto connection_holder = pool->get();
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get());
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
for (const auto & table_name : tables_list)
if (!detached_or_dropped.count(table_name))
@ -76,7 +94,7 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
Tables tables;
auto connection_holder = pool->get();
auto table_names = fetchPostgreSQLTablesList(connection_holder->get());
auto table_names = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
for (const auto & table_name : table_names)
if (!detached_or_dropped.count(table_name))
@ -104,8 +122,11 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
pqxx::result result = tx.exec(fmt::format(
"SELECT '{}'::regclass, tablename "
"FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' "
"AND tablename = '{}'", table_name, table_name));
"WHERE schemaname != 'pg_catalog' AND {} "
"AND tablename = '{}'",
formatTableName(table_name),
(postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema)),
formatTableName(table_name)));
}
catch (pqxx::undefined_table const &)
{
@ -151,14 +172,14 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr
return StoragePtr{};
auto connection_holder = pool->get();
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), doubleQuoteString(table_name)).columns;
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), formatTableName(table_name)).columns;
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name), pool, table_name,
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context);
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context, postgres_schema);
if (cache_tables)
cached_tables[table_name] = storage;
@ -182,10 +203,14 @@ void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr
std::lock_guard<std::mutex> lock{mutex};
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot attach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE,
"Cannot attach PostgreSQL table {} because it does not exist in PostgreSQL",
getTableNameForLogs(table_name), database_name);
if (!detached_or_dropped.count(table_name))
throw Exception(fmt::format("Cannot attach table {}.{}. It already exists", database_name, table_name), ErrorCodes::TABLE_ALREADY_EXISTS);
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS,
"Cannot attach PostgreSQL table {} because it already exists",
getTableNameForLogs(table_name), database_name);
if (cache_tables)
cached_tables[table_name] = storage;
@ -203,10 +228,10 @@ StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
std::lock_guard<std::mutex> lock{mutex};
if (detached_or_dropped.count(table_name))
throw Exception(fmt::format("Cannot detach table {}.{}. It is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot detach table {}. It is already dropped/detached", getTableNameForLogs(table_name));
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot detach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot detach table {}, because it does not exist", getTableNameForLogs(table_name));
if (cache_tables)
cached_tables.erase(table_name);
@ -234,10 +259,10 @@ void DatabasePostgreSQL::dropTable(ContextPtr, const String & table_name, bool /
std::lock_guard<std::mutex> lock{mutex};
if (!checkPostgresTable(table_name))
throw Exception(fmt::format("Cannot drop table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot drop table {} because it does not exist", getTableNameForLogs(table_name));
if (detached_or_dropped.count(table_name))
throw Exception(fmt::format("Table {}.{} is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is already dropped/detached", getTableNameForLogs(table_name));
fs::path mark_table_removed = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix);
FS::createFile(mark_table_removed);
@ -281,7 +306,7 @@ void DatabasePostgreSQL::removeOutdatedTables()
{
std::lock_guard<std::mutex> lock{mutex};
auto connection_holder = pool->get();
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get());
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);
if (cache_tables)
{
@ -334,7 +359,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
if (!storage)
{
if (throw_on_error)
throw Exception(fmt::format("PostgreSQL table {}.{} does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", getTableNameForLogs(table_name));
return nullptr;
}
@ -367,9 +392,9 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;
/// Remove extra engine argument (`use_table_cache`)
if (storage_engine_arguments->children.size() > 4)
storage_engine_arguments->children.resize(storage_engine_arguments->children.size() - 1);
/// Remove extra engine argument (`schema` and `use_table_cache`)
if (storage_engine_arguments->children.size() >= 5)
storage_engine_arguments->children.resize(4);
/// Add table_name to engine arguments
assert(storage_engine_arguments->children.size() >= 2);

View File

@ -32,7 +32,8 @@ public:
const String & metadata_path_,
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
const String & postgres_dbname_,
const String & postgres_schema_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_);
@ -69,7 +70,8 @@ protected:
private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
String postgres_dbname;
String postgres_schema;
postgres::PoolWithFailoverPtr pool;
const bool cache_tables;
@ -77,6 +79,10 @@ private:
std::unordered_set<std::string> detached_or_dropped;
BackgroundSchedulePool::TaskHolder cleaner_task;
String getTableNameForLogs(const String & table_name) const;
String formatTableName(const String & table_name) const;
bool checkPostgresTable(const String & table_name) const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const;

View File

@ -27,11 +27,12 @@ namespace ErrorCodes
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx)
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema)
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND {}",
postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema));
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(std::get<0>(table_name));
@ -270,10 +271,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connec
}
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection)
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTablesList(tx);
auto result = fetchPostgreSQLTablesList(tx, postgres_schema);
tx.commit();
return result;
}
@ -290,10 +291,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
bool with_primary_key, bool with_replica_identity_index);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema);
}

View File

@ -21,7 +21,7 @@ struct PostgreSQLTableStructure
using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection);
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema);
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true);
@ -32,7 +32,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
bool with_primary_key = false, bool with_replica_identity_index = false);
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx);
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema);
}

View File

@ -41,6 +41,9 @@ LibraryDictionarySource::LibraryDictionarySource(
, sample_block{sample_block_}
, context(Context::createCopy(context_))
{
if (fs::path(path).is_relative())
path = fs::canonical(path);
if (created_from_ddl && !pathStartsWith(path, context->getDictionariesLibPath()))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, context->getDictionariesLibPath());
@ -48,17 +51,32 @@ LibraryDictionarySource::LibraryDictionarySource(
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "LibraryDictionarySource: Can't load library {}: file doesn't exist", path);
description.init(sample_block);
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id);
auto res = bridge_helper->initLibrary(path, getLibrarySettingsString(config, config_prefix + ".settings"), getDictAttributesString());
if (!res)
LibraryBridgeHelper::LibraryInitData library_data
{
.library_path = path,
.library_settings = getLibrarySettingsString(config, config_prefix + ".settings"),
.dict_attributes = getDictAttributesString()
};
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id, library_data);
if (!bridge_helper->initLibrary())
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Failed to create shared library from path: {}", path);
}
LibraryDictionarySource::~LibraryDictionarySource()
{
bridge_helper->removeLibrary();
try
{
bridge_helper->removeLibrary();
}
catch (...)
{
tryLogCurrentException("LibraryDictionarySource");
}
}
@ -72,8 +90,9 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource &
, context(other.context)
, description{other.description}
{
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id);
bridge_helper->cloneLibrary(other.dictionary_id);
bridge_helper = std::make_shared<LibraryBridgeHelper>(context, description.sample_block, dictionary_id, other.bridge_helper->getLibraryData());
if (!bridge_helper->cloneLibrary(other.dictionary_id))
throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Failed to clone library");
}
@ -99,7 +118,7 @@ BlockInputStreamPtr LibraryDictionarySource::loadAll()
BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
return bridge_helper->loadIds(getDictIdsString(ids));
return bridge_helper->loadIds(ids);
}
@ -147,14 +166,6 @@ String LibraryDictionarySource::getLibrarySettingsString(const Poco::Util::Abstr
}
String LibraryDictionarySource::getDictIdsString(const std::vector<UInt64> & ids)
{
WriteBufferFromOwnString out;
writeVectorBinary(ids, out);
return out.str();
}
String LibraryDictionarySource::getDictAttributesString()
{
std::vector<String> attributes_names(dict_struct.attributes.size());

View File

@ -70,8 +70,6 @@ public:
std::string toString() const override;
private:
static String getDictIdsString(const std::vector<UInt64> & ids);
String getDictAttributesString();
static String getLibrarySettingsString(const Poco::Util::AbstractConfiguration & config, const std::string & config_root);
@ -82,7 +80,7 @@ private:
const DictionaryStructure dict_struct;
const std::string config_prefix;
const std::string path;
std::string path;
const Field dictionary_id;
Block sample_block;

View File

@ -363,7 +363,8 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
settings->client,
source_bucket,
source_path + SCHEMA_VERSION_OBJECT,
settings->s3_max_single_read_retries);
settings->s3_max_single_read_retries,
DBMS_DEFAULT_BUFFER_SIZE);
readIntText(version, buffer);

View File

@ -19,6 +19,7 @@ public:
virtual ~ProxyConfiguration() = default;
/// Returns proxy configuration on each HTTP request.
virtual Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
virtual void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) = 0;
};
}

View File

@ -20,6 +20,7 @@ class ProxyListConfiguration : public ProxyConfiguration
public:
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const Aws::Client::ClientConfigurationPerRequest &) override {}
private:
/// List of configured proxies.

View File

@ -16,8 +16,10 @@ namespace DB::ErrorCodes
namespace DB::S3
{
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_)
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
, unsigned proxy_port_, unsigned cache_ttl_)
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
{
}
@ -25,16 +27,25 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
std::unique_lock lock(cache_mutex);
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
{
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxyScheme), cached_config.proxyHost, cached_config.proxyPort);
return cached_config;
}
/// 1 second is enough for now.
/// TODO: Make timeouts configurable.
ConnectionTimeouts timeouts(
Poco::Timespan(1000000), /// Connection timeout.
Poco::Timespan(1000000), /// Send timeout.
Poco::Timespan(1000000) /// Receive timeout.
Poco::Timespan(1000000) /// Receive timeout.
);
auto session = makeHTTPSession(endpoint, timeouts);
Aws::Client::ClientConfigurationPerRequest cfg;
try
{
/// It should be just empty GET request.
@ -53,20 +64,41 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
cfg.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
cfg.proxyHost = proxy_host;
cfg.proxyPort = proxy_port;
cached_config.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
cached_config.proxyHost = proxy_host;
cached_config.proxyPort = proxy_port;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;
return cfg;
return cached_config;
}
catch (...)
{
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
/// Don't use proxy if it can't be obtained.
Aws::Client::ClientConfigurationPerRequest cfg;
return cfg;
}
}
void ProxyResolverConfiguration::errorReport(const Aws::Client::ClientConfigurationPerRequest & config)
{
if (config.proxyHost.empty())
return;
std::unique_lock lock(cache_mutex);
if (!cache_ttl.count() || !cache_valid)
return;
if (cached_config.proxyScheme != config.proxyScheme || cached_config.proxyHost != config.proxyHost
|| cached_config.proxyPort != config.proxyPort)
return;
/// Invalidate cached proxy when got error with this proxy
cache_valid = false;
}
}
#endif

View File

@ -8,6 +8,8 @@
#include "ProxyConfiguration.h"
#include <mutex>
namespace DB::S3
{
/**
@ -18,8 +20,9 @@ namespace DB::S3
class ProxyResolverConfiguration : public ProxyConfiguration
{
public:
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_);
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) override;
private:
/// Endpoint to obtain a proxy host.
@ -28,6 +31,12 @@ private:
const String proxy_scheme;
/// Port for obtained proxy.
const unsigned proxy_port;
std::mutex cache_mutex;
bool cache_valid = false;
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
const std::chrono::seconds cache_ttl{0};
Aws::Client::ClientConfigurationPerRequest cached_config;
};
}

View File

@ -56,11 +56,12 @@ std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
}
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
@ -128,8 +129,12 @@ getClient(const Poco::Util::AbstractConfiguration & config, const String & confi
auto proxy_config = getProxyConfiguration(config_prefix, config);
if (proxy_config)
{
client_configuration.perRequestConfiguration
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
}
client_configuration.retryStrategy
= std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));

View File

@ -1218,17 +1218,36 @@ public:
{
return res;
}
else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
// Comparing Date and DateTime64 requires implicit conversion,
// otherwise Date is treated as number.
&& !(date_and_datetime && (isDate(left_type) || isDate(right_type))))
else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type)))
{
// compare
if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
throw Exception("No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
// Comparing Date and DateTime64 requires implicit conversion,
if (date_and_datetime && (isDate(left_type) || isDate(right_type)))
{
DataTypePtr common_type = getLeastSupertype({left_type, right_type});
ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type);
ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type);
return executeDecimal({c0_converted, common_type, "left"}, {c1_converted, common_type, "right"});
}
else
{
// compare
if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
throw Exception(
"No operation " + getName() + " between " + left_type->getName() + " and " + right_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right);
}
return executeDecimal(col_with_type_and_name_left, col_with_type_and_name_right);
}
else if (date_and_datetime)
{
DataTypePtr common_type = getLeastSupertype({left_type, right_type});
ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type);
ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type);
if (!((res = executeNumLeftType<UInt32>(c0_converted.get(), c1_converted.get()))
|| (res = executeNumLeftType<UInt64>(c0_converted.get(), c1_converted.get()))))
throw Exception("Date related common types can only be UInt32 or UInt64", ErrorCodes::LOGICAL_ERROR);
return res;
}
else if (left_type->equals(*right_type))
{

View File

@ -42,6 +42,8 @@ struct MultiSearchFirstIndexImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -51,6 +51,8 @@ struct MultiSearchFirstPositionImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -41,6 +41,8 @@ struct MultiSearchImpl
}
++iteration;
}
if (iteration == 0)
std::fill(res.begin(), res.end(), 0);
}
};

View File

@ -113,12 +113,34 @@ namespace MultiRegexps
ScratchPtr scratch;
};
class RegexpsConstructor
{
public:
RegexpsConstructor() = default;
void setConstructor(std::function<Regexps()> constructor_) { constructor = std::move(constructor_); }
Regexps * operator()()
{
std::unique_lock lock(mutex);
if (regexp)
return &*regexp;
regexp = constructor();
return &*regexp;
}
private:
std::function<Regexps()> constructor;
std::optional<Regexps> regexp;
std::mutex mutex;
};
struct Pool
{
/// Mutex for finding in map.
std::mutex mutex;
/// Patterns + possible edit_distance to database and scratch.
std::map<std::pair<std::vector<String>, std::optional<UInt32>>, Regexps> storage;
std::map<std::pair<std::vector<String>, std::optional<UInt32>>, RegexpsConstructor> storage;
};
template <bool save_indices, bool CompileForEditDistance>
@ -250,15 +272,19 @@ namespace MultiRegexps
/// If not found, compile and let other threads wait.
if (known_regexps.storage.end() == it)
{
it = known_regexps.storage
.emplace(
std::pair{str_patterns, edit_distance},
constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance))
.emplace(std::piecewise_construct, std::make_tuple(std::move(str_patterns), edit_distance), std::make_tuple())
.first;
/// If found, unlock and return the database.
lock.unlock();
it->second.setConstructor([&str_patterns = it->first.first, edit_distance]()
{
return constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance);
});
}
return &it->second;
/// Unlock before possible construction.
lock.unlock();
return it->second();
}
}

View File

@ -1,4 +1,5 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeArray.h>
@ -7,6 +8,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include "Core/ColumnWithTypeAndName.h"
#include "DataTypes/DataTypeMap.h"
#include "DataTypes/IDataType.h"
namespace DB
@ -32,85 +34,211 @@ private:
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
{
WhichDataType which_key(key_type);
if (!(which_key.isInt() || which_key.isUInt()))
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
}
if (max_key_type)
{
WhichDataType which_max_key(max_key_type);
if (which_max_key.isNullable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Max key argument in arguments of function " + getName() + " can not be Nullable");
if (key_type->getTypeId() != max_key_type->getTypeId())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
}
}
DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
{
if (arguments.size() < 2)
throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());
if (arguments.size() > 3)
throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!key_array_type || !val_array_type)
throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());
DataTypePtr keys_type = key_array_type->getNestedType();
WhichDataType which_key(keys_type);
if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
{
throw Exception(
"Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const auto & key_type = key_array_type->getNestedType();
if (arguments.size() == 3)
{
DataTypePtr max_key_type = arguments[2];
WhichDataType which_max_key(max_key_type);
if (which_max_key.isNullable())
throw Exception(
"Max key argument in arguments of function " + getName() + " can not be Nullable",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (keys_type->getTypeId() != max_key_type->getTypeId())
throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
this->checkTypes(key_type, arguments[2]);
else
this->checkTypes(key_type, nullptr);
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
}
template <typename KeyType, typename ValType>
ColumnPtr execute2(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
{
MutableColumnPtr res_tuple = res_type.createColumn();
const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
if (arguments.size() == 1)
this->checkTypes(map->getKeyType(), nullptr);
else if (arguments.size() == 2)
this->checkTypes(map->getKeyType(), arguments[1]);
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
auto & to_keys_data = to_keys_arr.getData();
auto & to_keys_offsets = to_keys_arr.getOffsets();
return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
}
auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
auto & to_values_data = to_vals_arr.getData();
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");
bool max_key_is_const = false, key_is_const = false, val_is_const = false;
if (arguments[0]->getTypeId() == TypeIndex::Array)
return getReturnTypeForTuple(arguments);
else if (arguments[0]->getTypeId() == TypeIndex::Map)
return getReturnTypeForMap(arguments);
else
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} only accepts one map or arrays, but got {}",
getName(),
arguments[0]->getName());
}
const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
if (!keys_array)
// Struct holds input and output columns references,
// Both arrays and maps have similar columns to work with but extracted differently
template <typename KeyType, typename ValType>
struct ColumnsInOut
{
// inputs
const PaddedPODArray<KeyType> & in_keys_data;
const PaddedPODArray<ValType> & in_vals_data;
const IColumn::Offsets & in_key_offsets;
const IColumn::Offsets & in_val_offsets;
size_t row_count;
bool key_is_const;
bool val_is_const;
// outputs
PaddedPODArray<KeyType> & out_keys_data;
PaddedPODArray<ValType> & out_vals_data;
IColumn::Offsets & out_keys_offsets;
// with map argument this field will not be used
IColumn::Offsets * out_vals_offsets;
};
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
{
auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
const auto * key_column = arg_columns[0].get();
const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
bool key_is_const = false, val_is_const = false;
if (!in_keys_array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
if (!const_array)
throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());
keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
key_is_const = true;
}
const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
if (!values_array)
const auto * val_column = arg_columns[1].get();
const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
if (!in_values_array)
{
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
if (!const_array)
throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());
values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
val_is_const = true;
}
if (!keys_array || !values_array)
if (!in_keys_array || !in_values_array)
/* something went wrong */
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
const auto & in_keys_offsets = in_keys_array->getOffsets();
const auto & in_vals_offsets = in_values_array->getOffsets();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
auto & out_keys_offsets = out_keys_array.getOffsets();
size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
return {
in_keys_data,
in_values_data,
in_keys_offsets,
in_vals_offsets,
row_count,
key_is_const,
val_is_const,
out_keys_data,
out_vals_data,
out_keys_offsets,
out_vals_offsets};
}
template <typename KeyType, typename ValType>
ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
{
const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
const auto & in_nested_array = in_map->getNestedColumn();
const auto & in_nested_tuple = in_map->getNestedData();
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
const auto & in_keys_offsets = in_nested_array.getOffsets();
auto * out_map = assert_cast<ColumnMap *>(res_column.get());
auto & out_nested_array = out_map->getNestedColumn();
auto & out_nested_tuple = out_map->getNestedData();
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
auto & out_keys_offsets = out_nested_array.getOffsets();
return {
in_keys_data,
in_vals_data,
in_keys_offsets,
in_keys_offsets,
in_nested_array.size(),
false,
false,
out_keys_data,
out_vals_data,
out_keys_offsets,
nullptr};
}
template <typename KeyType, typename ValType>
ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
{
MutableColumnPtr res_column = res_type->createColumn();
bool max_key_is_const = false;
auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
: getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);
KeyType max_key_const{0};
@ -121,49 +249,43 @@ private:
max_key_is_const = true;
}
auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
// Original offsets
const IColumn::Offsets & key_offsets = keys_array->getOffsets();
const IColumn::Offsets & val_offsets = values_array->getOffsets();
IColumn::Offset offset{0};
size_t row_count = key_is_const ? values_array->size() : keys_array->size();
std::map<KeyType, ValType> res_map;
//Iterate through two arrays and fill result values.
for (size_t row = 0; row < row_count; ++row)
for (size_t row = 0; row < columns.row_count; ++row)
{
size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];
res_map.clear();
if (!key_is_const)
if (!columns.key_is_const)
{
key_offset = row > 0 ? key_offsets[row - 1] : 0;
array_size = key_offsets[row] - key_offset;
key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
items_count = columns.in_key_offsets[row] - key_offset;
}
if (!val_is_const)
if (!columns.val_is_const)
{
val_offset = row > 0 ? val_offsets[row - 1] : 0;
val_array_size = val_offsets[row] - val_offset;
val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
val_array_size = columns.in_val_offsets[row] - val_offset;
}
if (array_size != val_array_size)
throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (items_count != val_array_size)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Key and value array should have same amount of elements in function {}",
getName());
if (array_size == 0)
if (items_count == 0)
{
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
continue;
}
for (size_t i = 0; i < array_size; ++i)
for (size_t i = 0; i < items_count; ++i)
{
res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
}
auto min_key = res_map.begin()->first;
@ -184,7 +306,7 @@ private:
/* no need to add anything, max key is less that first key */
if (max_key < min_key)
{
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
continue;
}
}
@ -197,16 +319,16 @@ private:
KeyType key;
for (key = min_key;; ++key)
{
to_keys_data.insert(key);
columns.out_keys_data.push_back(key);
auto it = res_map.find(key);
if (it != res_map.end())
{
to_values_data.insert(it->second);
columns.out_vals_data.push_back(it->second);
}
else
{
to_values_data.insertDefault();
columns.out_vals_data.push_back(0);
}
++offset;
@ -214,80 +336,112 @@ private:
break;
}
to_keys_offsets.push_back(offset);
columns.out_keys_offsets.push_back(offset);
}
to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
return res_tuple;
if (columns.out_vals_offsets)
columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
return res_column;
}
template <typename KeyType>
ColumnPtr execute1(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
{
const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
switch (val_type->getTypeId())
{
case TypeIndex::Int8:
return execute2<KeyType, Int8>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
case TypeIndex::Int16:
return execute2<KeyType, Int16>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
case TypeIndex::Int32:
return execute2<KeyType, Int32>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
case TypeIndex::Int64:
return execute2<KeyType, Int64>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
case TypeIndex::Int128:
return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
case TypeIndex::Int256:
return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt8:
return execute2<KeyType, UInt8>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt16:
return execute2<KeyType, UInt16>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt32:
return execute2<KeyType, UInt32>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt64:
return execute2<KeyType, UInt64>(key_column, val_column, max_key_column, res_type);
return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt128:
return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
case TypeIndex::UInt256:
return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
default:
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
}
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
auto col1 = arguments[0];
auto col2 = arguments[1];
const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
/* determine output type */
const DataTypeTuple & res_type = DataTypeTuple(
DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
DataTypePtr res_type, key_type, val_type;
ColumnPtr max_key_column = nullptr;
ColumnPtr arg_columns[] = {arguments[0].column, nullptr};
if (arguments.size() == 3)
if (arguments[0].type->getTypeId() == TypeIndex::Array)
{
/* max key provided */
max_key_column = arguments[2].column;
key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
arg_columns[1] = arguments[1].column;
if (arguments.size() == 3)
{
/* max key provided */
max_key_column = arguments[2].column;
}
}
else
{
assert(arguments[0].type->getTypeId() == TypeIndex::Map);
const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
key_type = map_type->getKeyType();
val_type = map_type->getValueType();
if (arguments.size() == 2)
{
/* max key provided */
max_key_column = arguments[1].column;
}
}
switch (k->getNestedType()->getTypeId())
switch (key_type->getTypeId())
{
case TypeIndex::Int8:
return execute1<Int8>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int16:
return execute1<Int16>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int32:
return execute1<Int32>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int64:
return execute1<Int64>(col1.column, col2.column, max_key_column, res_type);
return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int128:
return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::Int256:
return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt8:
return execute1<UInt8>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt16:
return execute1<UInt16>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt32:
return execute1<UInt32>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt64:
return execute1<UInt64>(col1.column, col2.column, max_key_column, res_type);
return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt128:
return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
case TypeIndex::UInt256:
return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
default:
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
}
}
};
@ -296,5 +450,4 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
{
factory.registerFunction<FunctionMapPopulateSeries>();
}
}

View File

@ -43,7 +43,7 @@ public:
const String & bucket_,
const String & key_,
UInt64 max_single_read_retries_,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
size_t buffer_size_);
bool nextImpl() override;

View File

@ -403,7 +403,6 @@ bool tryReadIntText(T & x, ReadBuffer & buf) // -V1071
* Differs in following:
* - for numbers starting with zero, parsed only zero;
* - symbol '+' before number is not supported;
* - symbols :;<=>? are parsed as some numbers.
*/
template <typename T, bool throw_on_error = true>
void readIntTextUnsafe(T & x, ReadBuffer & buf)
@ -437,15 +436,12 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
while (!buf.eof())
{
/// This check is suddenly faster than
/// unsigned char c = *buf.position() - '0';
/// if (c < 10)
/// for unknown reason on Xeon E5645.
unsigned char value = *buf.position() - '0';
if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop.
if (value < 10)
{
res *= 10;
res += *buf.position() & 0x0F;
res += value;
++buf.position();
}
else

View File

@ -89,6 +89,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration)
: per_request_configuration(clientConfiguration.perRequestConfiguration)
, error_report(clientConfiguration.error_report)
, timeouts(ConnectionTimeouts(
Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout.
Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout.
@ -296,6 +297,8 @@ void PocoHTTPClient::makeRequestInternal(
else if (status_code >= 300)
{
ProfileEvents::increment(select_metric(S3MetricType::Errors));
if (status_code >= 500 && error_report)
error_report(request_configuration);
}
response->SetResponseBody(response_body_stream, session);

View File

@ -37,6 +37,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
void updateSchemeAndRegion();
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
private:
PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_);
@ -95,6 +97,7 @@ private:
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
std::function<Aws::Client::ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
ConnectionTimeouts timeouts;
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;

View File

@ -5,7 +5,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd/include
contrib/restricted/fast_float
contrib/restricted/fast_float/include
)
PEERDIR(

View File

@ -4,7 +4,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd/include
contrib/restricted/fast_float
contrib/restricted/fast_float/include
)
PEERDIR(

View File

@ -77,6 +77,7 @@ AsynchronousMetrics::AsynchronousMetrics(
, update_period(update_period_seconds)
, servers_to_start_before_tables(servers_to_start_before_tables_)
, servers(servers_)
, log(&Poco::Logger::get("AsynchronousMetrics"))
{
#if defined(OS_LINUX)
openFileIfExists("/proc/meminfo", meminfo);
@ -174,26 +175,39 @@ AsynchronousMetrics::AsynchronousMetrics(
edac.back().second = openFileIfExists(edac_uncorrectable_file);
}
if (std::filesystem::exists("/sys/block"))
{
for (const auto & device_dir : std::filesystem::directory_iterator("/sys/block"))
{
String device_name = device_dir.path().filename();
/// We are not interested in loopback devices.
if (device_name.starts_with("loop"))
continue;
std::unique_ptr<ReadBufferFromFilePRead> file = openFileIfExists(device_dir.path() / "stat");
if (!file)
continue;
block_devs[device_name] = std::move(file);
}
}
openBlockDevices();
#endif
}
#if defined(OS_LINUX)
void AsynchronousMetrics::openBlockDevices()
{
LOG_TRACE(log, "Scanning /sys/block");
if (!std::filesystem::exists("/sys/block"))
return;
block_devices_rescan_delay.restart();
block_devs.clear();
for (const auto & device_dir : std::filesystem::directory_iterator("/sys/block"))
{
String device_name = device_dir.path().filename();
/// We are not interested in loopback devices.
if (device_name.starts_with("loop"))
continue;
std::unique_ptr<ReadBufferFromFilePRead> file = openFileIfExists(device_dir.path() / "stat");
if (!file)
continue;
block_devs[device_name] = std::move(file);
}
}
#endif
void AsynchronousMetrics::start()
{
/// Update once right now, to make metrics available just after server start
@ -550,7 +564,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
/// Log only if difference is high. This is for convenience. The threshold is arbitrary.
if (difference >= 1048576 || difference <= -1048576)
LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"),
LOG_TRACE(log,
"MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
ReadableSize(amount),
ReadableSize(peak),
@ -765,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
uint64_t kb = 0;
readText(kb, *meminfo);
if (kb)
if (!kb)
{
skipWhitespaceIfAny(*meminfo, true);
assertString("kB", *meminfo);
skipToNextLineOrEOF(*meminfo);
continue;
}
uint64_t bytes = kb * 1024;
skipWhitespaceIfAny(*meminfo, true);
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
/**
* Not all entries in /proc/meminfo contain the kB suffix, e.g.
* HugePages_Total: 0
* HugePages_Free: 0
* We simply skip such entries as they're not needed
*/
if (*meminfo->position() == '\n')
{
skipToNextLineOrEOF(*meminfo);
continue;
}
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
assertString("kB", *meminfo);
uint64_t bytes = kb * 1024;
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
skipToNextLineOrEOF(*meminfo);
@ -877,9 +908,14 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
}
}
for (auto & [name, device] : block_devs)
/// Update list of block devices periodically
/// (i.e. someone may add new disk to RAID array)
if (block_devices_rescan_delay.elapsedSeconds() >= 300)
openBlockDevices();
try
{
try
for (auto & [name, device] : block_devs)
{
device->rewind();
@ -928,10 +964,20 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
}
}
}
catch (...)
{
/// Try to reopen block devices in case of error
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
try
{
openBlockDevices();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
tryLogCurrentException(__PRETTY_FUNCTION__);
}
if (net_dev)
@ -1303,9 +1349,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
new_values["AsynchronousMetricsCalculationTimeSpent"] = watch.elapsedSeconds();
/// Log the new metrics.
if (auto log = getContext()->getAsynchronousMetricLog())
if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog())
{
log->addValues(new_values);
asynchronous_metric_log->addValues(new_values);
}
first_run = false;

View File

@ -3,6 +3,7 @@
#include <Interpreters/Context_fwd.h>
#include <Common/MemoryStatisticsOS.h>
#include <Common/ThreadPool.h>
#include <Common/Stopwatch.h>
#include <IO/ReadBufferFromFile.h>
#include <condition_variable>
@ -15,6 +16,11 @@
#include <unordered_map>
namespace Poco
{
class Logger;
}
namespace DB
{
@ -175,12 +181,17 @@ private:
std::unordered_map<String /* device name */, NetworkInterfaceStatValues> network_interface_stats;
Stopwatch block_devices_rescan_delay;
void openBlockDevices();
#endif
std::unique_ptr<ThreadFromGlobalPool> thread;
void run();
void update(std::chrono::system_clock::time_point update_time);
Poco::Logger * log;
};
}

View File

@ -2796,6 +2796,13 @@ ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
return metadata_transaction;
}
void Context::resetZooKeeperMetadataTransaction()
{
assert(metadata_transaction);
assert(hasQueryContext());
metadata_transaction = nullptr;
}
PartUUIDsPtr Context::getPartUUIDs() const
{
auto lock = getLock();

View File

@ -819,6 +819,8 @@ public:
void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
/// Returns context of current distributed DDL query or nullptr.
ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
/// Removes context of current distributed DDL.
void resetZooKeeperMetadataTransaction();
PartUUIDsPtr getPartUUIDs() const;
PartUUIDsPtr getIgnoredPartUUIDs() const;

View File

@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int UNKNOWN_FORMAT_VERSION;
extern const int UNKNOWN_TYPE_OF_QUERY;
extern const int INCONSISTENT_CLUSTER_DEFINITION;
extern const int LOGICAL_ERROR;
}
HostID HostID::fromString(const String & host_port_str)
@ -362,7 +363,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
query_context->getClientInfo().is_replicated_database_internal = true;
query_context->setCurrentDatabase(database->getDatabaseName());
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query, entry_path);
query_context->initZooKeeperMetadataTransaction(txn);
if (is_initial_query)
@ -402,7 +403,8 @@ UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
void ZooKeeperMetadataTransaction::commit()
{
assert(state == CREATED);
if (state != CREATED)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state ({}), it's a bug", state);
state = FAILED;
current_zookeeper->multi(ops);
state = COMMITTED;

View File

@ -20,6 +20,11 @@ namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class ASTQueryWithOnCluster;
using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
using ClusterPtr = std::shared_ptr<Cluster>;
@ -164,13 +169,15 @@ class ZooKeeperMetadataTransaction
ZooKeeperPtr current_zookeeper;
String zookeeper_path;
bool is_initial_query;
String task_path;
Coordination::Requests ops;
public:
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_, const String & task_path_)
: current_zookeeper(current_zookeeper_)
, zookeeper_path(zookeeper_path_)
, is_initial_query(is_initial_query_)
, task_path(task_path_)
{
}
@ -180,15 +187,21 @@ public:
String getDatabaseZooKeeperPath() const { return zookeeper_path; }
String getTaskZooKeeperPath() const { return task_path; }
ZooKeeperPtr getZooKeeper() const { return current_zookeeper; }
void addOp(Coordination::RequestPtr && op)
{
assert(!isExecuted());
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
ops.emplace_back(op);
}
void moveOpsTo(Coordination::Requests & other_ops)
{
assert(!isExecuted());
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
ops.clear();
state = COMMITTED;

View File

@ -613,18 +613,6 @@ void makeWindowDescriptionFromAST(const Context & context,
void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
{
// Convenient to check here because at least we have the Context.
if (!syntax->window_function_asts.empty() &&
!getContext()->getSettingsRef().allow_experimental_window_functions)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"The support for window functions is experimental and will change"
" in backwards-incompatible ways in the future releases. Set"
" allow_experimental_window_functions = 1 to enable it."
" While processing '{}'",
syntax->window_function_asts[0]->formatForErrorMessage());
}
// Window definitions from the WINDOW clause
const auto * select_query = query->as<ASTSelectQuery>();
if (select_query && select_query->window())

View File

@ -63,7 +63,7 @@ public:
return;
bool is_table = false;
ASTPtr subquery_or_table_name = ast; /// ASTTableIdentifier | ASTSubquery | ASTTableExpression
ASTPtr subquery_or_table_name; /// ASTTableIdentifier | ASTSubquery | ASTTableExpression
if (const auto * ast_table_expr = ast->as<ASTTableExpression>())
{
@ -76,7 +76,14 @@ public:
}
}
else if (ast->as<ASTTableIdentifier>())
{
subquery_or_table_name = ast;
is_table = true;
}
else if (ast->as<ASTSubquery>())
{
subquery_or_table_name = ast;
}
if (!subquery_or_table_name)
throw Exception("Global subquery requires subquery or table name", ErrorCodes::WRONG_GLOBAL_SUBQUERY);

View File

@ -37,7 +37,7 @@ public:
virtual size_t getTotalRowCount() const = 0;
virtual size_t getTotalByteCount() const = 0;
virtual bool alwaysReturnsEmptySet() const { return false; }
virtual bool alwaysReturnsEmptySet() const = 0;
/// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock.
/// Different query plan is used for such joins.

View File

@ -8,6 +8,7 @@
#include <Common/Macros.h>
#include <Common/randomSeed.h>
#include <Common/renameat2.h>
#include <Common/hex.h>
#include <Core/Defines.h>
#include <Core/Settings.h>
@ -31,7 +32,9 @@
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
@ -84,7 +87,6 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE;
extern const int PATH_ACCESS_DENIED;
extern const int NOT_IMPLEMENTED;
extern const int UNKNOWN_TABLE;
}
namespace fs = std::filesystem;
@ -803,36 +805,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
create.uuid = UUIDHelpers::Nil;
create.to_inner_uuid = UUIDHelpers::Nil;
}
if (create.replace_table)
{
if (database->getUUID() == UUIDHelpers::Nil)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"{} query is supported only for Atomic databases",
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
UUID uuid_of_table_to_replace;
if (create.create_or_replace)
{
uuid_of_table_to_replace = getContext()->tryResolveStorageID(StorageID(create.database, create.table)).uuid;
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
{
/// Convert to usual CREATE
create.replace_table = false;
assert(!database->isTableExist(create.table, getContext()));
}
else
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
}
else
{
uuid_of_table_to_replace = getContext()->resolveStorageID(StorageID(create.database, create.table)).uuid;
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
backQuoteIfNeed(create.database), backQuoteIfNeed(create.table));
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
}
}
}
@ -1110,23 +1082,72 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
const InterpreterCreateQuery::TableProperties & properties)
{
/// Replicated database requires separate contexts for each DDL query
ContextPtr current_context = getContext();
ContextMutablePtr create_context = Context::createCopy(current_context);
create_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
auto make_drop_context = [&](bool on_error) -> ContextMutablePtr
{
ContextMutablePtr drop_context = Context::createCopy(current_context);
drop_context->makeQueryContext();
if (on_error)
return drop_context;
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// Execute drop as separate query, because [CREATE OR] REPLACE query can be considered as
/// successfully executed after RENAME/EXCHANGE query.
drop_context->resetZooKeeperMetadataTransaction();
auto drop_txn = std::make_shared<ZooKeeperMetadataTransaction>(txn->getZooKeeper(), txn->getDatabaseZooKeeperPath(),
txn->isInitialQuery(), txn->getTaskZooKeeperPath());
drop_context->initZooKeeperMetadataTransaction(drop_txn);
}
return drop_context;
};
auto ast_drop = std::make_shared<ASTDropQuery>();
String table_to_replace_name = create.table;
bool created = false;
bool replaced = false;
try
{
[[maybe_unused]] bool done = doCreateTable(create, properties);
assert(done);
auto database = DatabaseCatalog::instance().getDatabase(create.database);
if (database->getUUID() == UUIDHelpers::Nil)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"{} query is supported only for Atomic databases",
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
UInt64 name_hash = sipHash64(create.database + create.table);
UInt16 random_suffix = thread_local_rng();
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// Avoid different table name on database replicas
random_suffix = sipHash64(txn->getTaskZooKeeperPath());
}
create.table = fmt::format("_tmp_replace_{}_{}",
getHexUIntLowercase(name_hash),
getHexUIntLowercase(random_suffix));
ast_drop->table = create.table;
ast_drop->is_dictionary = create.is_dictionary;
ast_drop->database = create.database;
ast_drop->kind = ASTDropQuery::Drop;
created = true;
if (!create.replace_table)
return fillTableIfNeeded(create);
}
bool created = false;
bool renamed = false;
try
{
/// Create temporary table (random name will be generated)
[[maybe_unused]] bool done = InterpreterCreateQuery(query_ptr, create_context).doCreateTable(create, properties);
assert(done);
created = true;
/// Try fill temporary table
BlockIO fill_io = fillTableIfNeeded(create);
executeTrivialBlockIO(fill_io, getContext());
/// Replace target table with created one
auto ast_rename = std::make_shared<ASTRenameQuery>();
ASTRenameQuery::Element elem
{
@ -1135,22 +1156,44 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
};
ast_rename->elements.push_back(std::move(elem));
ast_rename->exchange = true;
ast_rename->dictionary = create.is_dictionary;
if (create.create_or_replace)
{
/// CREATE OR REPLACE TABLE
/// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
ast_rename->rename_if_cannot_exchange = true;
ast_rename->exchange = false;
}
else
{
/// REPLACE TABLE
/// Will execute EXCHANGE query and fail if the target table does not exist
ast_rename->exchange = true;
}
InterpreterRenameQuery(ast_rename, getContext()).execute();
replaced = true;
InterpreterRenameQuery interpreter_rename{ast_rename, current_context};
interpreter_rename.execute();
renamed = true;
InterpreterDropQuery(ast_drop, getContext()).execute();
if (!interpreter_rename.renamedInsteadOfExchange())
{
/// Target table was replaced with new one, drop old table
auto drop_context = make_drop_context(false);
InterpreterDropQuery(ast_drop, drop_context).execute();
}
create.table = table_to_replace_name;
return fillTableIfNeeded(create);
return {};
}
catch (...)
{
if (created && create.replace_table && !replaced)
InterpreterDropQuery(ast_drop, getContext()).execute();
/// Drop temporary table if it was successfully created, but was not renamed to target name
if (created && !renamed)
{
auto drop_context = make_drop_context(true);
InterpreterDropQuery(ast_drop, drop_context).execute();
}
throw;
}
}

View File

@ -72,12 +72,27 @@ BlockIO InterpreterRenameQuery::execute()
BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
{
assert(!rename.rename_if_cannot_exchange || descriptions.size() == 1);
assert(!(rename.rename_if_cannot_exchange && rename.exchange));
auto & database_catalog = DatabaseCatalog::instance();
for (const auto & elem : descriptions)
{
if (!rename.exchange)
bool exchange_tables;
if (rename.exchange)
{
exchange_tables = true;
}
else if (rename.rename_if_cannot_exchange)
{
exchange_tables = database_catalog.isTableExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
renamed_instead_of_exchange = !exchange_tables;
}
else
{
exchange_tables = false;
database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
}
DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
if (typeid_cast<DatabaseReplicated *>(database.get())
@ -100,7 +115,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
elem.from_table_name,
*database_catalog.getDatabase(elem.to_database_name),
elem.to_table_name,
rename.exchange,
exchange_tables,
rename.dictionary);
}
}

View File

@ -55,6 +55,8 @@ public:
BlockIO execute() override;
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;
bool renamedInsteadOfExchange() const { return renamed_instead_of_exchange; }
private:
BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
@ -62,6 +64,7 @@ private:
AccessRightsElements getRequiredAccess() const;
ASTPtr query_ptr;
bool renamed_instead_of_exchange{false};
};
}

View File

@ -1928,11 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
}
}
/// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
UInt64 limit = (query.hasFiltration() || query.groupBy()) ? 0 : getLimitForSorting(query, context);
if (query_info.projection)
query_info.projection->input_order_info
= query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context);
= query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context, limit);
else
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit);
}
StreamLocalLimits limits;
@ -2290,8 +2292,14 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
{
const Settings & settings = context->getSettingsRef();
const auto & query = getSelectQuery();
auto finish_sorting_step = std::make_unique<FinishSortingStep>(
query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, settings.max_block_size, limit);
query_plan.getCurrentDataStream(),
input_sorting_info->order_key_prefix_descr,
output_order_descr,
settings.max_block_size,
limit,
query.hasFiltration());
query_plan.addStep(std::move(finish_sorting_step));
}

View File

@ -32,6 +32,8 @@ public:
size_t getTotalRowCount() const override { return right_blocks.row_count; }
size_t getTotalByteCount() const override { return right_blocks.bytes; }
/// Has to be called only after setTotals()/mergeRightBlocks()
bool alwaysReturnsEmptySet() const override { return (is_right || is_inner) && min_max_right_blocks.empty(); }
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;

View File

@ -3,6 +3,7 @@
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/Context.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h>
@ -170,6 +171,24 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
/// Don't go into query argument.
return;
}
/// For lambda functions we need to avoid replacing lambda parameters with external aliases, for example,
/// Select 1 as x, arrayMap(x -> x + 2, [1, 2, 3])
/// shouldn't be replaced with Select 1 as x, arrayMap(x -> **(1 as x)** + 2, [1, 2, 3])
Aliases extracted_aliases;
if (func_node->name == "lambda")
{
Names lambda_aliases = RequiredSourceColumnsMatcher::extractNamesFromLambda(*func_node);
for (const auto & name : lambda_aliases)
{
auto it = data.aliases.find(name);
if (it != data.aliases.end())
{
extracted_aliases.insert(data.aliases.extract(it));
}
}
}
/// We skip the first argument. We also assume that the lambda function can not have parameters.
size_t first_pos = 0;
if (func_node->name == "lambda")
@ -192,6 +211,11 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
{
visitChildren(func_node->window_definition.get(), data);
}
for (auto & it : extracted_aliases)
{
data.aliases.insert(it);
}
}
else if (!node->as<ASTSelectQuery>())
{

View File

@ -39,7 +39,7 @@ public:
using SetOfASTs = std::set<const IAST *>;
using MapOfASTs = std::map<ASTPtr, ASTPtr>;
const Aliases & aliases;
Aliases & aliases;
const NameSet & source_columns_set;
ExtractedSettings settings;
@ -53,7 +53,7 @@ public:
/// It's Ok to have "c + 1 AS c" in queries, but not in table definition
const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
Data(const Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
: aliases(aliases_)
, source_columns_set(source_columns_set_)
, settings(settings_)

View File

@ -74,6 +74,7 @@ namespace ErrorCodes
{
extern const int INTO_OUTFILE_NOT_ALLOWED;
extern const int QUERY_WAS_CANCELLED;
extern const int LOGICAL_ERROR;
}
@ -262,7 +263,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr
elem.query = query_for_logging;
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
// We don't calculate query_kind, databases, tables and columns when the query isn't able to start
// Try log query_kind if ast is valid
if (ast)
elem.query_kind = ast->getQueryKindString();
// We don't calculate databases, tables and columns when the query isn't able to start
elem.exception_code = getCurrentExceptionCode();
elem.exception = getCurrentExceptionMessage(false);
@ -671,7 +676,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
/// Common code for finish and exception callbacks
auto status_info_to_query_log = [](QueryLogElement &element, const QueryStatusInfo &info, const ASTPtr query_ast) mutable
auto status_info_to_query_log = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable
{
DB::UInt64 query_time = info.elapsed_seconds * 1000000;
ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, query_time);
@ -696,6 +701,17 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
element.thread_ids = std::move(info.thread_ids);
element.profile_counters = std::move(info.profile_counters);
const auto & factories_info = context_ptr->getQueryFactoriesInfo();
element.used_aggregate_functions = factories_info.aggregate_functions;
element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators;
element.used_database_engines = factories_info.database_engines;
element.used_data_type_families = factories_info.data_type_families;
element.used_dictionaries = factories_info.dictionaries;
element.used_formats = factories_info.formats;
element.used_functions = factories_info.functions;
element.used_storages = factories_info.storages;
element.used_table_functions = factories_info.table_functions;
};
/// Also make possible for caller to log successful query finish and exception during execution.
@ -726,7 +742,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
const auto finish_time = std::chrono::system_clock::now();
elem.event_time = time_in_seconds(finish_time);
elem.event_time_microseconds = time_in_microseconds(finish_time);
status_info_to_query_log(elem, info, ast);
status_info_to_query_log(elem, info, ast, context);
auto progress_callback = context->getProgressCallback();
@ -767,20 +783,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
ReadableSize(elem.read_bytes / elapsed_seconds));
}
elem.thread_ids = std::move(info.thread_ids);
elem.profile_counters = std::move(info.profile_counters);
const auto & factories_info = context->getQueryFactoriesInfo();
elem.used_aggregate_functions = factories_info.aggregate_functions;
elem.used_aggregate_function_combinators = factories_info.aggregate_function_combinators;
elem.used_database_engines = factories_info.database_engines;
elem.used_data_type_families = factories_info.data_type_families;
elem.used_dictionaries = factories_info.dictionaries;
elem.used_formats = factories_info.formats;
elem.used_functions = factories_info.functions;
elem.used_storages = factories_info.storages;
elem.used_table_functions = factories_info.table_functions;
if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
{
if (auto query_log = context->getQueryLog())
@ -849,7 +851,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (process_list_elem)
{
QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false);
status_info_to_query_log(elem, info, ast);
status_info_to_query_log(elem, info, ast, context);
}
if (current_settings.calculate_text_stack_trace)
@ -1010,22 +1012,31 @@ void executeQuery(
const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
WriteBuffer * out_buf = &ostr;
std::optional<WriteBufferFromFile> out_file_buf;
std::unique_ptr<WriteBuffer> compressed_buffer;
if (ast_query_with_output && ast_query_with_output->out_file)
{
if (!allow_into_outfile)
throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);
const auto & out_file = ast_query_with_output->out_file->as<ASTLiteral &>().value.safeGet<std::string>();
out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
out_buf = &*out_file_buf;
compressed_buffer = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
chooseCompressionMethod(out_file, ""),
/* compression level = */ 3
);
}
String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
? getIdentifierName(ast_query_with_output->format)
: context->getDefaultFormat();
auto out = FormatFactory::instance().getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader(), context, {}, output_format_settings);
auto out = FormatFactory::instance().getOutputStreamParallelIfPossible(
format_name,
compressed_buffer ? *compressed_buffer : *out_buf,
streams.in->getHeader(),
context,
{},
output_format_settings);
/// Save previous progress callback if any. TODO Do it more conveniently.
auto previous_progress_callback = context->getProgressCallback();
@ -1049,15 +1060,18 @@ void executeQuery(
const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
WriteBuffer * out_buf = &ostr;
std::optional<WriteBufferFromFile> out_file_buf;
std::unique_ptr<WriteBuffer> compressed_buffer;
if (ast_query_with_output && ast_query_with_output->out_file)
{
if (!allow_into_outfile)
throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);
const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
out_buf = &*out_file_buf;
compressed_buffer = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
chooseCompressionMethod(out_file, ""),
/* compression level = */ 3
);
}
String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
@ -1071,7 +1085,14 @@ void executeQuery(
return std::make_shared<MaterializingTransform>(header);
});
auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader(), context, {}, output_format_settings);
auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(
format_name,
compressed_buffer ? *compressed_buffer : *out_buf,
pipeline.getHeader(),
context,
{},
output_format_settings);
out->setAutoFlush();
/// Save previous progress callback if any. TODO Do it more conveniently.
@ -1113,4 +1134,32 @@ void executeQuery(
streams.onFinish();
}
void executeTrivialBlockIO(BlockIO & streams, ContextPtr context)
{
try
{
if (streams.out)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires input, but no input buffer provided, it's a bug");
if (streams.in)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires output, but no output buffer provided, it's a bug");
if (!streams.pipeline.initialized())
return;
if (!streams.pipeline.isCompleted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query pipeline requires output, but no output buffer provided, it's a bug");
streams.pipeline.setProgressCallback(context->getProgressCallback());
auto executor = streams.pipeline.execute();
executor->execute(streams.pipeline.getNumThreads());
}
catch (...)
{
streams.onException();
throw;
}
streams.onFinish();
}
}

View File

@ -55,4 +55,8 @@ BlockIO executeQuery(
bool allow_processors /// If can use processors pipeline
);
/// Executes BlockIO returned from executeQuery(...)
/// if built pipeline does not require any input and does not produce any output.
void executeTrivialBlockIO(BlockIO & streams, ContextPtr context);
}

View File

@ -225,6 +225,8 @@ public:
return removeOnCluster<ASTAlterQuery>(clone(), new_database);
}
const char * getQueryKindString() const override { return "Alter"; }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

View File

@ -102,6 +102,8 @@ public:
bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view; }
const char * getQueryKindString() const override { return "Create"; }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};

View File

@ -45,6 +45,8 @@ public:
return removeOnCluster<ASTDropQuery>(clone(), new_database);
}
const char * getQueryKindString() const override { return "Drop"; }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};

View File

@ -34,5 +34,6 @@ public:
void replaceEmptyDatabase(const String & current_database);
void replaceCurrentUserTag(const String & current_user_name) const;
ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster<ASTGrantQuery>(clone()); }
const char * getQueryKindString() const override { return is_revoke ? "Revoke" : "Grant"; }
};
}

View File

@ -47,6 +47,8 @@ public:
return res;
}
const char * getQueryKindString() const override { return "Insert"; }
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};

View File

@ -34,6 +34,9 @@ public:
bool database{false}; /// For RENAME DATABASE
bool dictionary{false}; /// For RENAME DICTIONARY
/// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist.
bool rename_if_cannot_exchange{false};
/** Get the text that identifies this element. */
String getID(char) const override { return "Rename"; }
@ -61,6 +64,8 @@ public:
return query_ptr;
}
const char * getQueryKindString() const override { return "Rename"; }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
{

View File

@ -69,6 +69,8 @@ public:
const ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); }
const ASTPtr settings() const { return getExpression(Expression::SETTINGS); }
bool hasFiltration() const { return where() || prewhere() || having(); }
/// Set/Reset/Remove expression.
void setExpression(Expression expr, ASTPtr && ast);
@ -95,6 +97,8 @@ public:
void setFinal();
const char * getQueryKindString() const override { return "Select"; }
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

View File

@ -16,6 +16,8 @@ public:
ASTPtr clone() const override;
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
const char * getQueryKindString() const override { return "Select"; }
enum class Mode
{
Unspecified,

View File

@ -86,6 +86,8 @@ public:
return removeOnCluster<ASTSystemQuery>(clone(), new_database);
}
const char * getQueryKindString() const override { return "System"; }
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

View File

@ -231,6 +231,9 @@ public:
void cloneChildren();
// Return query_kind string representation of this AST query.
virtual const char * getQueryKindString() const { return ""; }
public:
/// For syntax highlighting.
static const char * hilite_keyword;

View File

@ -16,7 +16,7 @@ public:
const Block & header, size_t num_inputs,
SortDescription description_, size_t max_block_size)
: IMergingTransform(
num_inputs, header, header, true,
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
header,
num_inputs,
std::move(description_),

View File

@ -20,7 +20,7 @@ public:
WriteBuffer * out_row_sources_buf_ = nullptr,
bool use_average_block_sizes = false)
: IMergingTransform(
num_inputs, header, header, true,
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
header,
num_inputs,
std::move(description_),

View File

@ -19,7 +19,7 @@ public:
SortDescription description,
size_t max_block_size)
: IMergingTransform(
num_inputs, header, header, true,
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
header,
num_inputs,
params,

View File

@ -15,7 +15,7 @@ public:
SortDescription description_, size_t max_block_size,
Graphite::Params params_, time_t time_of_merge_)
: IMergingTransform(
num_inputs, header, header, true,
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
header,
num_inputs,
std::move(description_),

View File

@ -14,9 +14,11 @@ IMergingTransformBase::IMergingTransformBase(
size_t num_inputs,
const Block & input_header,
const Block & output_header,
bool have_all_inputs_)
bool have_all_inputs_,
bool has_limit_below_one_block_)
: IProcessor(InputPorts(num_inputs, input_header), {output_header})
, have_all_inputs(have_all_inputs_)
, has_limit_below_one_block(has_limit_below_one_block_)
{
}
@ -64,10 +66,7 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
continue;
if (input_states[i].is_initialized)
{
// input.setNotNeeded();
continue;
}
input.setNeeded();
@ -77,12 +76,17 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
continue;
}
auto chunk = input.pull();
/// setNotNeeded after reading first chunk, because in optimismtic case
/// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n')
/// we won't have to read any chunks anymore;
auto chunk = input.pull(has_limit_below_one_block);
if (!chunk.hasRows())
{
if (!input.isFinished())
{
input.setNeeded();
all_inputs_has_data = false;
}
continue;
}

Some files were not shown because too many files have changed in this diff Show More