mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into fix-number-of-threads-in-subqueries
This commit is contained in:
commit
0b315fccc2
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -220,4 +220,4 @@
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
[submodule "contrib/NuRaft"]
|
||||
path = contrib/NuRaft
|
||||
url = https://github.com/eBay/NuRaft.git
|
||||
url = https://github.com/ClickHouse-Extras/NuRaft.git
|
||||
|
@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021.
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/extended_types.h>
|
||||
#include <common/defines.h>
|
||||
|
||||
|
||||
namespace common
|
||||
{
|
||||
@ -156,4 +158,11 @@ namespace common
|
||||
return false;
|
||||
return (x * y) / y != x;
|
||||
}
|
||||
|
||||
/// Multiply and ignore overflow.
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x * y;
|
||||
}
|
||||
}
|
||||
|
@ -562,6 +562,7 @@ void debugIncreaseOOMScore()
|
||||
{
|
||||
DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
|
||||
buf.write(new_score.c_str(), new_score.size());
|
||||
buf.close();
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
@ -784,7 +785,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
/// Setup signal handlers.
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
|
||||
|
||||
|
@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT OS_FREEBSD)
|
||||
if (NOT OS_FREEBSD AND NOT OS_DARWIN)
|
||||
set (USE_NURAFT 1)
|
||||
set (NURAFT_LIBRARY nuraft)
|
||||
|
||||
@ -20,5 +20,5 @@ if (NOT OS_FREEBSD)
|
||||
message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}")
|
||||
else()
|
||||
set (USE_NURAFT 0)
|
||||
message (STATUS "Using internal NuRaft library on FreeBSD is not supported")
|
||||
message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported")
|
||||
endif()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1
|
||||
Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1
|
||||
Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
|
@ -30,7 +30,12 @@ set(SRCS
|
||||
|
||||
add_library(nuraft ${SRCS})
|
||||
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
|
||||
|
||||
if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY)
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1)
|
||||
else()
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
|
||||
endif()
|
||||
|
||||
target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft)
|
||||
# for some reason include "asio.h" directly without "boost/" prefix.
|
||||
|
@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
|
||||
20.12.3.3
|
||||
```
|
||||
|
||||
### Volumes
|
||||
### Volumes
|
||||
|
||||
Typically you may want to mount the following folders inside your container to archieve persistency:
|
||||
|
||||
@ -76,7 +76,7 @@ You may also want to mount:
|
||||
* `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets
|
||||
* `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
|
||||
|
||||
### Linux capabilities
|
||||
### Linux capabilities
|
||||
|
||||
ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html).
|
||||
|
||||
@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv
|
||||
|
||||
### How to create default database and user on starting
|
||||
|
||||
Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`:
|
||||
Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`:
|
||||
|
||||
```
|
||||
$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
|
||||
$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
|
||||
```
|
||||
|
||||
## How to extend this image
|
||||
|
@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||
CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
|
||||
</networks>
|
||||
<password>${CLICKHOUSE_PASSWORD}</password>
|
||||
<quota>default</quota>
|
||||
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
|
||||
</${CLICKHOUSE_USER}>
|
||||
</users>
|
||||
</yandex>
|
||||
|
@ -163,6 +163,7 @@ function clone_submodules
|
||||
contrib/xz
|
||||
contrib/dragonbox
|
||||
contrib/fast_float
|
||||
contrib/NuRaft
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
@ -182,6 +183,7 @@ function run_cmake
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
"-DENABLE_NURAFT=1"
|
||||
)
|
||||
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
|
29
docs/_description_templates/template-data-type.md
Normal file
29
docs/_description_templates/template-data-type.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
toc_priority:
|
||||
toc_title:
|
||||
---
|
||||
|
||||
# data_type_name {#data_type-name}
|
||||
|
||||
Description.
|
||||
|
||||
**Parameters** (Optional)
|
||||
|
||||
- `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
- `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
|
||||
```
|
||||
|
||||
## Additional Info {#additional-info} (Optional)
|
||||
|
||||
The name of an additional section can be any, for example, **Usage**.
|
||||
|
||||
**See Also** (Optional)
|
||||
|
||||
- [link](#)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
|
@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint
|
||||
- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
|
||||
- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
|
||||
- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
|
||||
|
||||
This configuration also applies to S3 disks in `MergeTree` table engine family.
|
||||
- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
|
||||
Example:
|
||||
|
||||
@ -149,6 +148,7 @@ Example:
|
||||
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
|
||||
<!-- <use_environment_credentials>false</use_environment_credentials> -->
|
||||
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
|
||||
<!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
|
||||
</endpoint-name>
|
||||
</s3>
|
||||
```
|
||||
|
@ -715,6 +715,7 @@ Configuration markup:
|
||||
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
<server_side_encryption_customer_key_base64>your_base64_encoded_customer_key</server_side_encryption_customer_key_base64>
|
||||
<proxy>
|
||||
<uri>http://proxy1</uri>
|
||||
<uri>http://proxy2</uri>
|
||||
@ -750,7 +751,8 @@ Optional parameters:
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
|
@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p
|
||||
|
||||
It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need.
|
||||
|
||||
More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition).
|
||||
More details on [table truncation](../../sql-reference/statements/truncate.md).
|
||||
|
@ -31,8 +31,8 @@ The supported formats are:
|
||||
| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ |
|
||||
| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ |
|
||||
@ -612,7 +612,7 @@ Example:
|
||||
```
|
||||
|
||||
## JSONEachRow {#jsoneachrow}
|
||||
## JSONStringEachRow {#jsonstringeachrow}
|
||||
## JSONStringsEachRow {#jsonstringseachrow}
|
||||
## JSONCompactEachRow {#jsoncompacteachrow}
|
||||
## JSONCompactStringEachRow {#jsoncompactstringeachrow}
|
||||
|
||||
@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite
|
||||
When inserting the data, you should provide a separate JSON value for each row.
|
||||
|
||||
## JSONEachRowWithProgress {#jsoneachrowwithprogress}
|
||||
## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
|
||||
## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress}
|
||||
|
||||
Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values.
|
||||
Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values.
|
||||
|
||||
```json
|
||||
{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
|
||||
|
@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format.
|
||||
Possible values:
|
||||
|
||||
- 0 — Enum values are parsed as values.
|
||||
- 1 — Enum values are parsed as enum IDs
|
||||
- 1 — Enum values are parsed as enum IDs.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
@ -2592,6 +2592,18 @@ Possible values:
|
||||
|
||||
Default value: `16`.
|
||||
|
||||
## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
|
||||
|
||||
Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied).
|
||||
- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
|
||||
- 1 — The trace for all executed queries is enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## optimize_on_insert {#optimize-on-insert}
|
||||
|
||||
Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
|
||||
|
53
docs/en/operations/system-tables/opentelemetry_span_log.md
Normal file
53
docs/en/operations/system-tables/opentelemetry_span_log.md
Normal file
@ -0,0 +1,53 @@
|
||||
# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
|
||||
|
||||
Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries.
|
||||
|
||||
Columns:
|
||||
|
||||
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query.
|
||||
|
||||
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.
|
||||
|
||||
- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`.
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
|
||||
|
||||
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
|
||||
|
||||
- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
attribute.names: []
|
||||
attribute.values: []
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [OpenTelemetry](../../operations/opentelemetry.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) <!--hide-->
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
toc_priority: 47
|
||||
toc_title: ClickHouse Update
|
||||
toc_title: ClickHouse Upgrade
|
||||
---
|
||||
|
||||
# ClickHouse Update {#clickhouse-update}
|
||||
# ClickHouse Upgrade {#clickhouse-upgrade}
|
||||
|
||||
If ClickHouse was installed from `deb` packages, execute the following commands on the server:
|
||||
|
||||
@ -16,3 +16,19 @@ $ sudo service clickhouse-server restart
|
||||
If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
|
||||
|
||||
ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
|
||||
|
||||
The upgrade of older version of ClickHouse to specific version:
|
||||
|
||||
As an example:
|
||||
|
||||
`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
|
||||
|
||||
```bash
|
||||
$ sudo apt-get update
|
||||
$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
|
||||
$ sudo service clickhouse-server restart
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
83
docs/en/sql-reference/data-types/map.md
Normal file
83
docs/en/sql-reference/data-types/map.md
Normal file
@ -0,0 +1,83 @@
|
||||
---
|
||||
toc_priority: 65
|
||||
toc_title: Map(key, value)
|
||||
---
|
||||
|
||||
# Map(key, value) {#data_type-map}
|
||||
|
||||
`Map(key, value)` data type stores `key:value` pairs.
|
||||
|
||||
**Parameters**
|
||||
- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
!!! warning "Warning"
|
||||
Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`.
|
||||
|
||||
To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity.
|
||||
|
||||
**Examples**
|
||||
|
||||
Consider the table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
|
||||
INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
|
||||
```
|
||||
|
||||
Select all `key2` values:
|
||||
|
||||
```sql
|
||||
SELECT a['key2'] FROM table_map;
|
||||
```
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 10 │
|
||||
│ 20 │
|
||||
│ 30 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays.
|
||||
|
||||
```sql
|
||||
INSERT INTO table_map VALUES ({'key3':100}), ({});
|
||||
SELECT a['key3'] FROM table_map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
│ 100 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## Convert Tuple to Map Type {#map-and-tuple}
|
||||
|
||||
You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function:
|
||||
|
||||
``` sql
|
||||
SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─map───────────────────────────┐
|
||||
│ {1:'Ready',2:'Steady',3:'Go'} │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
|
||||
- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/data-types/map/) <!--hide-->
|
@ -265,32 +265,81 @@ SELECT toIPv6('127.0.0.1')
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## isIPv4String
|
||||
## isIPv4String {#isipv4string}
|
||||
|
||||
Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise.
|
||||
Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`.
|
||||
|
||||
``` sql
|
||||
SELECT isIPv4String('127.0.0.1')
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
isIPv4String(string)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `string` — IP address. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` if `string` is IPv4 address, `0` otherwise.
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─isIPv4String('127.0.0.1')─┐
|
||||
│ 1 │
|
||||
└───────────────────────────┘
|
||||
┌─addr─────────────┬─isIPv4String(addr)─┐
|
||||
│ 0.0.0.0 │ 1 │
|
||||
│ 127.0.0.1 │ 1 │
|
||||
│ ::ffff:127.0.0.1 │ 0 │
|
||||
└──────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
## isIPv6String
|
||||
## isIPv6String {#isipv6string}
|
||||
|
||||
Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise.
|
||||
Determines whether the input string is an IPv6 address or not. If `string` is IPv4 address returns `0`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
isIPv6String(string)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `string` — IP address. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` if `string` is IPv6 address, `0` otherwise.
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT isIPv6String('2001:438:ffff::407d:1bc1')
|
||||
SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────────────┘
|
||||
┌─addr─────────────┬─isIPv6String(addr)─┐
|
||||
│ :: │ 1 │
|
||||
│ 1111::ffff │ 1 │
|
||||
│ ::ffff:127.0.0.1 │ 1 │
|
||||
│ 127.0.0.1 │ 0 │
|
||||
└──────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->
|
||||
|
@ -909,6 +909,66 @@ WHERE diff != 1
|
||||
|
||||
Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
|
||||
|
||||
## runningConcurrency {#runningconcurrency}
|
||||
|
||||
Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time.
|
||||
|
||||
!!! warning "Warning"
|
||||
Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block.
|
||||
|
||||
The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
runningConcurrency(begin, end)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `end` — A column for the ending time of events (exclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
Note that two columns `begin` and `end` must have the same type.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The concurrency of events at the data point.
|
||||
|
||||
Type: [UInt32](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌───────────────begin─┬─────────────────end─┐
|
||||
│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │
|
||||
│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │
|
||||
│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │
|
||||
│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │
|
||||
│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │
|
||||
└─────────────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT runningConcurrency(begin, end) FROM example
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─runningConcurrency(begin, end)─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
│ 3 │
|
||||
│ 2 │
|
||||
│ 1 │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
## MACNumToString(num) {#macnumtostringnum}
|
||||
|
||||
Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form).
|
||||
|
@ -5,6 +5,68 @@ toc_title: Working with maps
|
||||
|
||||
# Functions for maps {#functions-for-working-with-tuple-maps}
|
||||
|
||||
## map {#function-map}
|
||||
|
||||
Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
map(key1, value1[, key2, value2, ...])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Data structure as `key:value` pairs.
|
||||
|
||||
Type: [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─map('key1', number, 'key2', multiply(number, 2))─┐
|
||||
│ {'key1':0,'key2':0} │
|
||||
│ {'key1':1,'key2':2} │
|
||||
│ {'key1':2,'key2':4} │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
|
||||
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
SELECT a['key2'] FROM table_map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 0 │
|
||||
│ 2 │
|
||||
│ 4 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Map(key, value)](../../sql-reference/data-types/map.md) data type
|
||||
|
||||
|
||||
## mapAdd {#function-mapadd}
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
@ -112,4 +174,4 @@ Result:
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->
|
||||
|
@ -133,10 +133,9 @@ For example:
|
||||
|
||||
### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
|
||||
|
||||
Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if:
|
||||
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name.
|
||||
|
||||
- you need fresh TLD list,
|
||||
- or you have custom.
|
||||
Can be useful if you need fresh TLD list or you have custom.
|
||||
|
||||
Configuration example:
|
||||
|
||||
@ -149,21 +148,150 @@ Configuration example:
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
Example:
|
||||
**Syntax**
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`.
|
||||
``` sql
|
||||
cutToFirstSignificantSubdomain(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Part of the domain that includes top-level subdomains up to the first significant subdomain.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
|
||||
│ foo.there-is-no-such-domain │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
|
||||
|
||||
Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
|
||||
Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name.
|
||||
|
||||
Can be useful if you need fresh TLD list or you have custom.
|
||||
|
||||
Configuration example:
|
||||
|
||||
```xml
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<top_level_domains_lists>
|
||||
<!-- https://publicsuffix.org/list/public_suffix_list.dat -->
|
||||
<public_suffix_list>public_suffix_list.dat</public_suffix_list>
|
||||
<!-- NOTE: path is under top_level_domains_path -->
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
|
||||
│ www.foo │
|
||||
└──────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
|
||||
|
||||
Same as `firstSignificantSubdomain` but accept custom TLD list name.
|
||||
Returns the first significant subdomain. Accepts customs TLD list name.
|
||||
|
||||
### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
|
||||
Can be useful if you need fresh TLD list or you have custom.
|
||||
|
||||
Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name.
|
||||
Configuration example:
|
||||
|
||||
```xml
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<top_level_domains_lists>
|
||||
<!-- https://publicsuffix.org/list/public_suffix_list.dat -->
|
||||
<public_suffix_list>public_suffix_list.dat</public_suffix_list>
|
||||
<!-- NOTE: path is under top_level_domains_path -->
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
firstSignificantSubdomainCustom(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- First significant subdomain.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
|
||||
│ foo │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### port(URL\[, default_port = 0\]) {#port}
|
||||
|
||||
|
@ -20,6 +20,7 @@ The following actions are supported:
|
||||
|
||||
- [ADD COLUMN](#alter_add-column) — Adds a new column to the table.
|
||||
- [DROP COLUMN](#alter_drop-column) — Deletes the column.
|
||||
- [RENAME COLUMN](#alter_rename-column) — Renames the column.
|
||||
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
|
||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||
@ -78,6 +79,22 @@ Example:
|
||||
ALTER TABLE visits DROP COLUMN browser
|
||||
```
|
||||
|
||||
## RENAME COLUMN {#alter_rename-column}
|
||||
|
||||
``` sql
|
||||
RENAME COLUMN [IF EXISTS] name to new_name
|
||||
```
|
||||
|
||||
Renames the column `name` to `new_name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. Since renaming does not involve the underlying data, the query is completed almost instantly.
|
||||
|
||||
**NOTE**: Columns specified in the key expression of the table (either with `ORDER BY` or `PRIMARY KEY`) cannot be renamed. Trying to change these columns will produce `SQL Error [524]`.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE visits RENAME COLUMN webBrowser TO browser
|
||||
```
|
||||
|
||||
## CLEAR COLUMN {#alter_clear-column}
|
||||
|
||||
``` sql
|
||||
|
@ -14,14 +14,16 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
||||
|
||||
| Feature | Support or workaround |
|
||||
| --------| ----------|
|
||||
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes |
|
||||
| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes |
|
||||
| `ROWS` frame | yes |
|
||||
| `RANGE` frame | yes, it is the default |
|
||||
| `GROUPS` frame | no |
|
||||
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
|
||||
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
|
||||
| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported |
|
||||
| `ROWS` frame | supported |
|
||||
| `RANGE` frame | supported, the default |
|
||||
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |
|
||||
| `GROUPS` frame | not supported |
|
||||
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
|
||||
| `rank()`, `dense_rank()`, `row_number()` | yes |
|
||||
| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
||||
| `rank()`, `dense_rank()`, `row_number()` | supported |
|
||||
| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
||||
|
||||
## References
|
||||
|
||||
|
@ -283,12 +283,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
|
||||
|
||||
## input_format_tsv_empty_as_default {#settings-input-format-tsv-empty-as-default}
|
||||
|
||||
Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`.
|
||||
Если эта настройка включена, все пустые поля во входящем TSV заменяются значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`.
|
||||
|
||||
По умолчанию отключена.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
|
||||
|
||||
Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV.
|
||||
@ -708,7 +706,7 @@ ClickHouse использует этот параметр при чтении д
|
||||
|
||||
Установка логирования запроса.
|
||||
|
||||
Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log).
|
||||
Запросы, переданные в ClickHouse с этой настройкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log).
|
||||
|
||||
Пример:
|
||||
|
||||
@ -1521,7 +1519,7 @@ ClickHouse генерирует исключение
|
||||
- Тип: секунды
|
||||
- Значение по умолчанию: 60 секунд
|
||||
|
||||
Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed_replica_error_half_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки.
|
||||
Управляет скоростью обнуления счетчика ошибок в распределенных таблицах. Предположим, реплика остается недоступна в течение какого-то времени, и за этот период накопилось 5 ошибок. Если настройка `distributed_replica_error_half_life` установлена в значение 1 секунда, то реплика снова будет считаться доступной через 3 секунды после последней ошибки.
|
||||
|
||||
См. также:
|
||||
|
||||
@ -1673,7 +1671,7 @@ ClickHouse генерирует исключение
|
||||
- Тип: bool
|
||||
- Значение по умолчанию: True
|
||||
|
||||
Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
|
||||
Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
|
||||
|
||||
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}
|
||||
|
||||
@ -1987,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
|
||||
|
||||
## output_format_pretty_grid_charset {#output-format-pretty-grid-charset}
|
||||
|
||||
Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII.
|
||||
Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII.
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -2473,6 +2471,18 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
|
||||
|
||||
Значение по умолчанию: `16`.
|
||||
|
||||
## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
|
||||
|
||||
Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки).
|
||||
- Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов.
|
||||
- 1 — трассировка для всех выполненных запросов включена.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
## optimize_on_insert {#optimize-on-insert}
|
||||
|
||||
Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы).
|
||||
|
49
docs/ru/operations/system-tables/opentelemetry_span_log.md
Normal file
49
docs/ru/operations/system-tables/opentelemetry_span_log.md
Normal file
@ -0,0 +1,49 @@
|
||||
# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
|
||||
|
||||
Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса.
|
||||
|
||||
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`.
|
||||
|
||||
- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`.
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах).
|
||||
|
||||
- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`.
|
||||
|
||||
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/).
|
||||
|
||||
- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
attribute.names: []
|
||||
attribute.values: []
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) <!--hide-->
|
69
docs/ru/sql-reference/data-types/map.md
Normal file
69
docs/ru/sql-reference/data-types/map.md
Normal file
@ -0,0 +1,69 @@
|
||||
---
|
||||
toc_priority: 65
|
||||
toc_title: Map(key, value)
|
||||
---
|
||||
|
||||
# Map(key, value) {#data_type-map}
|
||||
|
||||
Тип данных `Map(key, value)` хранит пары `ключ:значение`.
|
||||
|
||||
**Параметры**
|
||||
- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`.
|
||||
|
||||
Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Рассмотрим таблицу:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
|
||||
INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
|
||||
```
|
||||
|
||||
Выборка всех значений ключа `key2`:
|
||||
|
||||
```sql
|
||||
SELECT a['key2'] FROM table_map;
|
||||
```
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 10 │
|
||||
│ 20 │
|
||||
│ 30 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
Если для какого-то ключа `key` в колонке с типом `Map()` нет значения, запрос возвращает нули для числовых колонок, пустые строки или пустые массивы.
|
||||
|
||||
```sql
|
||||
INSERT INTO table_map VALUES ({'key3':100}), ({});
|
||||
SELECT a['key3'] FROM table_map;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
│ 100 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
┌─arrayElement(a, 'key3')─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map)
|
||||
- функция [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/data-types/map/) <!--hide-->
|
@ -243,4 +243,81 @@ SELECT
|
||||
└───────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## isIPv4String {#isipv4string}
|
||||
|
||||
Определяет, является ли строка адресом IPv4 или нет. Также вернет `0`, если `string` — адрес IPv6.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
isIPv4String(string)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- `1` если `string` является адресом IPv4 , иначе — `0`.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─addr─────────────┬─isIPv4String(addr)─┐
|
||||
│ 0.0.0.0 │ 1 │
|
||||
│ 127.0.0.1 │ 1 │
|
||||
│ ::ffff:127.0.0.1 │ 0 │
|
||||
└──────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
## isIPv6String {#isipv6string}
|
||||
|
||||
Определяет, является ли строка адресом IPv6 или нет. Также вернет `0`, если `string` — адрес IPv4.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
isIPv6String(string)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `string` — IP адрес. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- `1` если `string` является адресом IPv6 , иначе — `0`.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─addr─────────────┬─isIPv6String(addr)─┐
|
||||
│ :: │ 1 │
|
||||
│ 1111::ffff │ 1 │
|
||||
│ ::ffff:127.0.0.1 │ 1 │
|
||||
│ 127.0.0.1 │ 0 │
|
||||
└──────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) <!--hide-->
|
||||
|
@ -5,6 +5,66 @@ toc_title: Работа с контейнерами map
|
||||
|
||||
# Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps}
|
||||
|
||||
## map {#function-map}
|
||||
|
||||
Преобразовывает пары `ключ:значение` в тип данных [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
map(key1, value1[, key2, value2, ...])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Структура данных в виде пар `ключ:значение`.
|
||||
|
||||
Тип: [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─map('key1', number, 'key2', multiply(number, 2))─┐
|
||||
│ {'key1':0,'key2':0} │
|
||||
│ {'key1':1,'key2':2} │
|
||||
│ {'key1':2,'key2':4} │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
|
||||
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
SELECT a['key2'] FROM table_map;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 0 │
|
||||
│ 2 │
|
||||
│ 4 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- тип данных [Map(key, value)](../../sql-reference/data-types/map.md)
|
||||
## mapAdd {#function-mapadd}
|
||||
|
||||
Собирает все ключи и суммирует соответствующие значения.
|
||||
|
@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')
|
||||
|
||||
Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
|
||||
### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom}
|
||||
|
||||
Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня).
|
||||
|
||||
Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
|
||||
|
||||
Пример конфигурации:
|
||||
|
||||
```xml
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<top_level_domains_lists>
|
||||
<!-- https://publicsuffix.org/list/public_suffix_list.dat -->
|
||||
<public_suffix_list>public_suffix_list.dat</public_suffix_list>
|
||||
<!-- NOTE: path is under top_level_domains_path -->
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
cutToFirstSignificantSubdomain(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена.
|
||||
|
||||
Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
|
||||
│ foo.there-is-no-such-domain │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww}
|
||||
|
||||
Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня.
|
||||
|
||||
Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
|
||||
|
||||
Пример конфигурации:
|
||||
|
||||
```xml
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<top_level_domains_lists>
|
||||
<!-- https://publicsuffix.org/list/public_suffix_list.dat -->
|
||||
<public_suffix_list>public_suffix_list.dat</public_suffix_list>
|
||||
<!-- NOTE: path is under top_level_domains_path -->
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`.
|
||||
|
||||
Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐
|
||||
│ www.foo │
|
||||
└──────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom}
|
||||
|
||||
Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня.
|
||||
|
||||
Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский.
|
||||
|
||||
Пример конфигурации:
|
||||
|
||||
```xml
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<top_level_domains_lists>
|
||||
<!-- https://publicsuffix.org/list/public_suffix_list.dat -->
|
||||
<public_suffix_list>public_suffix_list.dat</public_suffix_list>
|
||||
<!-- NOTE: path is under top_level_domains_path -->
|
||||
</top_level_domains_lists>
|
||||
```
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
firstSignificantSubdomainCustom(URL, TLD)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Первый существенный поддомен.
|
||||
|
||||
Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐
|
||||
│ foo │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [firstSignificantSubdomain](#firstsignificantsubdomain).
|
||||
|
||||
### port(URL[, default_port = 0]) {#port}
|
||||
|
||||
Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL)
|
||||
|
@ -59,7 +59,6 @@
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Server/HTTPHandlerFactory.h>
|
||||
#include <Server/TestKeeperTCPHandlerFactory.h>
|
||||
#include "MetricsTransmitter.h"
|
||||
#include <Common/StatusFile.h>
|
||||
#include <Server/TCPHandlerFactory.h>
|
||||
@ -94,6 +93,9 @@
|
||||
# include <Server/GRPCServer.h>
|
||||
#endif
|
||||
|
||||
#if USE_NURAFT
|
||||
# include <Server/NuKeeperTCPHandlerFactory.h>
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -842,23 +844,33 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
listen_try = true;
|
||||
}
|
||||
|
||||
for (const auto & listen_host : listen_hosts)
|
||||
if (config().has("test_keeper_server"))
|
||||
{
|
||||
/// TCP TestKeeper
|
||||
const char * port_name = "test_keeper_server.tcp_port";
|
||||
createServer(listen_host, port_name, listen_try, [&](UInt16 port)
|
||||
#if USE_NURAFT
|
||||
/// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
|
||||
global_context->initializeNuKeeperStorageDispatcher();
|
||||
for (const auto & listen_host : listen_hosts)
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
servers_to_start_before_tables->emplace_back(
|
||||
port_name,
|
||||
std::make_unique<Poco::Net::TCPServer>(
|
||||
new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
/// TCP NuKeeper
|
||||
const char * port_name = "test_keeper_server.tcp_port";
|
||||
createServer(listen_host, port_name, listen_try, [&](UInt16 port)
|
||||
{
|
||||
Poco::Net::ServerSocket socket;
|
||||
auto address = socketBindListen(socket, listen_host, port);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
servers_to_start_before_tables->emplace_back(
|
||||
port_name,
|
||||
std::make_unique<Poco::Net::TCPServer>(
|
||||
new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
|
||||
|
||||
LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString());
|
||||
});
|
||||
}
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
|
||||
#endif
|
||||
|
||||
LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString());
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & server : *servers_to_start_before_tables)
|
||||
@ -898,6 +910,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
|
||||
else
|
||||
LOG_INFO(log, "Closed connections to servers for tables.");
|
||||
|
||||
global_context->shutdownNuKeeperStorageDispatcher();
|
||||
}
|
||||
|
||||
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
|
||||
|
@ -421,9 +421,15 @@
|
||||
<!-- Comma-separated list of prefixes for user-defined settings. -->
|
||||
<custom_settings_prefixes></custom_settings_prefixes>
|
||||
|
||||
<!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distributed DDL worker and so on). -->
|
||||
<!-- System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on). -->
|
||||
<!-- <system_profile>default</system_profile> -->
|
||||
|
||||
<!-- Buffer profile of settings.
|
||||
This settings are used by Buffer storage to flush data to the underlying table.
|
||||
Default: used from system_profile directive.
|
||||
-->
|
||||
<!-- <buffer_profile>default</buffer_profile> -->
|
||||
|
||||
<!-- Default database. -->
|
||||
<default_database>default</default_database>
|
||||
|
||||
|
@ -217,6 +217,7 @@ namespace
|
||||
/// Write the file.
|
||||
WriteBufferFromFile out{tmp_file_path.string()};
|
||||
out.write(file_contents.data(), file_contents.size());
|
||||
out.close();
|
||||
|
||||
/// Rename.
|
||||
std::filesystem::rename(tmp_file_path, file_path);
|
||||
@ -274,6 +275,7 @@ namespace
|
||||
writeStringBinary(name, out);
|
||||
writeUUIDText(id, out);
|
||||
}
|
||||
out.close();
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,6 +34,14 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
|
||||
factory.registerFunction("any", { createAggregateFunctionAny, properties });
|
||||
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
|
||||
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
|
||||
|
||||
// Synonyms for use as window functions.
|
||||
factory.registerFunction("first_value",
|
||||
{ createAggregateFunctionAny, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("last_value",
|
||||
{ createAggregateFunctionAnyLast, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ struct MovingData
|
||||
Array value; /// Prefix sums.
|
||||
T sum = 0;
|
||||
|
||||
void add(T val, Arena * arena)
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
@ -120,7 +120,7 @@ public:
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
@ -26,6 +26,7 @@ class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
class IColumn;
|
||||
class IDataType;
|
||||
class IWindowFunction;
|
||||
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
using DataTypes = std::vector<DataTypePtr>;
|
||||
@ -215,6 +216,20 @@ public:
|
||||
const DataTypes & getArgumentTypes() const { return argument_types; }
|
||||
const Array & getParameters() const { return parameters; }
|
||||
|
||||
// Any aggregate function can be calculated over a window, but there are some
|
||||
// window functions such as rank() that require a different interface, e.g.
|
||||
// because they don't respect the window frame, or need to be notified when
|
||||
// a new peer group starts. They pretend to be normal aggregate functions,
|
||||
// but will fail if you actually try to use them in Aggregator. The
|
||||
// WindowTransform recognizes these functions and handles them differently.
|
||||
// We could have a separate factory for window functions, and make all
|
||||
// aggregate functions implement IWindowFunction interface and so on. This
|
||||
// would be more logically correct, but more complex. We only have a handful
|
||||
// of true window functions, so this hack-ish interface suffices.
|
||||
virtual IWindowFunction * asWindowFunction() { return nullptr; }
|
||||
virtual const IWindowFunction * asWindowFunction() const
|
||||
{ return const_cast<IAggregateFunction *>(this)->asWindowFunction(); }
|
||||
|
||||
protected:
|
||||
DataTypes argument_types;
|
||||
Array parameters;
|
||||
|
@ -58,6 +58,8 @@ void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactor
|
||||
void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &);
|
||||
void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &);
|
||||
|
||||
void registerWindowFunctions(AggregateFunctionFactory & factory);
|
||||
|
||||
|
||||
void registerAggregateFunctions()
|
||||
{
|
||||
@ -103,6 +105,8 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionMannWhitney(factory);
|
||||
registerAggregateFunctionWelchTTest(factory);
|
||||
registerAggregateFunctionStudentTTest(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -60,6 +60,7 @@ add_subdirectory (Processors)
|
||||
add_subdirectory (Formats)
|
||||
add_subdirectory (Compression)
|
||||
add_subdirectory (Server)
|
||||
add_subdirectory (Coordination)
|
||||
|
||||
|
||||
set(dbms_headers)
|
||||
@ -192,6 +193,10 @@ add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Alg
|
||||
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
|
||||
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
|
||||
|
||||
if (USE_NURAFT)
|
||||
add_object_library(clickhouse_coordination Coordination)
|
||||
endif()
|
||||
|
||||
set (DBMS_COMMON_LIBRARIES)
|
||||
# libgcc_s does not provide an implementation of an atomics library. Instead,
|
||||
# GCC’s libatomic library can be used to supply these when using libgcc_s.
|
||||
@ -314,7 +319,7 @@ if (USE_KRB5)
|
||||
endif()
|
||||
|
||||
if (USE_NURAFT)
|
||||
dbms_target_link_libraries(PRIVATE ${NURAFT_LIBRARY})
|
||||
dbms_target_link_libraries(PUBLIC ${NURAFT_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(RE2_INCLUDE_DIR)
|
||||
|
@ -756,7 +756,11 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
|
||||
Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
|
||||
{
|
||||
in->setAsyncCallback(std::move(async_callback));
|
||||
SCOPE_EXIT(in->setAsyncCallback({}));
|
||||
SCOPE_EXIT({
|
||||
/// disconnect() will reset "in".
|
||||
if (in)
|
||||
in->setAsyncCallback({});
|
||||
});
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -455,7 +455,14 @@ template <>
|
||||
struct LowCardinalityKeys<false> {};
|
||||
|
||||
/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
|
||||
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true, bool need_offset = false>
|
||||
template <
|
||||
typename Value,
|
||||
typename Key,
|
||||
typename Mapped,
|
||||
bool has_nullable_keys_ = false,
|
||||
bool has_low_cardinality_ = false,
|
||||
bool use_cache = true,
|
||||
bool need_offset = false>
|
||||
struct HashMethodKeysFixed
|
||||
: private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
|
||||
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache, need_offset>, Value, Mapped, use_cache, need_offset>
|
||||
@ -471,6 +478,12 @@ struct HashMethodKeysFixed
|
||||
Sizes key_sizes;
|
||||
size_t keys_size;
|
||||
|
||||
/// SSSE3 shuffle method can be used. Shuffle masks will be calculated and stored here.
|
||||
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
|
||||
std::unique_ptr<uint8_t[]> masks;
|
||||
std::unique_ptr<const char*[]> columns_data;
|
||||
#endif
|
||||
|
||||
HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes_, const HashMethodContextPtr &)
|
||||
: Base(key_columns), key_sizes(std::move(key_sizes_)), keys_size(key_columns.size())
|
||||
{
|
||||
@ -491,6 +504,58 @@ struct HashMethodKeysFixed
|
||||
low_cardinality_keys.nested_columns[i] = key_columns[i];
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
|
||||
if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
|
||||
{
|
||||
/** The task is to "pack" multiple fixed-size fields into single larger Key.
|
||||
* Example: pack UInt8, UInt32, UInt16, UInt64 into UInt128 key:
|
||||
* [- ---- -- -------- -] - the resulting uint128 key
|
||||
* ^ ^ ^ ^ ^
|
||||
* u8 u32 u16 u64 zero
|
||||
*
|
||||
* We can do it with the help of SSSE3 shuffle instruction.
|
||||
*
|
||||
* There will be a mask for every GROUP BY element (keys_size masks in total).
|
||||
* Every mask has 16 bytes but only sizeof(Key) bytes are used (other we don't care).
|
||||
*
|
||||
* Every byte in the mask has the following meaning:
|
||||
* - if it is 0..15, take the element at this index from source register and place here in the result;
|
||||
* - if it is 0xFF - set the elemend in the result to zero.
|
||||
*
|
||||
* Example:
|
||||
* We want to copy UInt32 to offset 1 in the destination and set other bytes in the destination as zero.
|
||||
* The corresponding mask will be: FF, 0, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF
|
||||
*
|
||||
* The max size of destination is 16 bytes, because we cannot process more with SSSE3.
|
||||
*
|
||||
* The method is disabled under MSan, because it's allowed
|
||||
* to load into SSE register and process up to 15 bytes of uninitialized memory in columns padding.
|
||||
* We don't use this uninitialized memory but MSan cannot look "into" the shuffle instruction.
|
||||
*
|
||||
* 16-bytes masks can be placed overlapping, only first sizeof(Key) bytes are relevant in each mask.
|
||||
* We initialize them to 0xFF and then set the needed elements.
|
||||
*/
|
||||
size_t total_masks_size = sizeof(Key) * keys_size + (16 - sizeof(Key));
|
||||
masks.reset(new uint8_t[total_masks_size]);
|
||||
memset(masks.get(), 0xFF, total_masks_size);
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < key_sizes[i]; ++j)
|
||||
{
|
||||
masks[i * sizeof(Key) + offset] = j;
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
columns_data.reset(new const char*[keys_size]);
|
||||
|
||||
for (size_t i = 0; i < keys_size; ++i)
|
||||
columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
|
||||
@ -506,6 +571,10 @@ struct HashMethodKeysFixed
|
||||
return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
|
||||
&low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
|
||||
|
||||
#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)
|
||||
if constexpr (!has_low_cardinality && !has_nullable_keys && sizeof(Key) <= 16)
|
||||
return packFixedShuffle<Key>(columns_data.get(), keys_size, key_sizes.data(), row, masks.get());
|
||||
#endif
|
||||
return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
|
||||
}
|
||||
}
|
||||
|
@ -534,6 +534,7 @@
|
||||
M(565, TOO_MANY_PARTITIONS) \
|
||||
M(566, CANNOT_RMDIR) \
|
||||
M(567, DUPLICATED_PART_UUIDS) \
|
||||
M(568, RAFT_ERROR) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -1,139 +0,0 @@
|
||||
#include <Common/ZooKeeper/TestKeeperStorageDispatcher.h>
|
||||
#include <Common/setThreadName.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
}
|
||||
|
||||
}
|
||||
namespace zkutil
|
||||
{
|
||||
|
||||
void TestKeeperStorageDispatcher::processingThread()
|
||||
{
|
||||
setThreadName("TestKeeperSProc");
|
||||
|
||||
while (!shutdown)
|
||||
{
|
||||
RequestInfo info;
|
||||
|
||||
UInt64 max_wait = UInt64(operation_timeout.totalMilliseconds());
|
||||
|
||||
if (requests_queue.tryPop(info, max_wait))
|
||||
{
|
||||
if (shutdown)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
auto responses = storage.processRequest(info.request, info.session_id);
|
||||
for (const auto & response_for_session : responses)
|
||||
setResponse(response_for_session.session_id, response_for_session.response);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
auto session_writer = session_to_response_callback.find(session_id);
|
||||
if (session_writer == session_to_response_callback.end())
|
||||
return;
|
||||
|
||||
session_writer->second(response);
|
||||
/// Session closed, no more writes
|
||||
if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
|
||||
session_to_response_callback.erase(session_writer);
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::finalize()
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
|
||||
if (shutdown)
|
||||
return;
|
||||
|
||||
shutdown = true;
|
||||
|
||||
if (processing_thread.joinable())
|
||||
processing_thread.join();
|
||||
}
|
||||
|
||||
RequestInfo info;
|
||||
TestKeeperStorage::RequestsForSessions expired_requests;
|
||||
while (requests_queue.tryPop(info))
|
||||
expired_requests.push_back(TestKeeperStorage::RequestForSession{info.session_id, info.request});
|
||||
|
||||
auto expired_responses = storage.finalize(expired_requests);
|
||||
|
||||
for (const auto & response_for_session : expired_responses)
|
||||
setResponse(response_for_session.session_id, response_for_session.response);
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
|
||||
{
|
||||
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
if (session_to_response_callback.count(session_id) == 0)
|
||||
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown session id {}", session_id);
|
||||
}
|
||||
|
||||
RequestInfo request_info;
|
||||
request_info.time = clock::now();
|
||||
request_info.request = request;
|
||||
request_info.session_id = session_id;
|
||||
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
/// Put close requests without timeouts
|
||||
if (request->getOpNum() == Coordination::OpNum::Close)
|
||||
requests_queue.push(std::move(request_info));
|
||||
else if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds()))
|
||||
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
|
||||
TestKeeperStorageDispatcher::TestKeeperStorageDispatcher()
|
||||
{
|
||||
processing_thread = ThreadFromGlobalPool([this] { processingThread(); });
|
||||
}
|
||||
|
||||
TestKeeperStorageDispatcher::~TestKeeperStorageDispatcher()
|
||||
{
|
||||
try
|
||||
{
|
||||
finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
if (!session_to_response_callback.try_emplace(session_id, callback).second)
|
||||
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
|
||||
}
|
||||
|
||||
void TestKeeperStorageDispatcher::finishSession(int64_t session_id)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
auto session_it = session_to_response_callback.find(session_id);
|
||||
if (session_it != session_to_response_callback.end())
|
||||
session_to_response_callback.erase(session_it);
|
||||
}
|
||||
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Common/ZooKeeper/TestKeeperStorage.h>
|
||||
#include <functional>
|
||||
|
||||
namespace zkutil
|
||||
{
|
||||
|
||||
using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
|
||||
|
||||
class TestKeeperStorageDispatcher
|
||||
{
|
||||
private:
|
||||
Poco::Timespan operation_timeout{0, Coordination::DEFAULT_OPERATION_TIMEOUT_MS * 1000};
|
||||
|
||||
using clock = std::chrono::steady_clock;
|
||||
|
||||
struct RequestInfo
|
||||
{
|
||||
Coordination::ZooKeeperRequestPtr request;
|
||||
clock::time_point time;
|
||||
int64_t session_id;
|
||||
};
|
||||
|
||||
std::mutex push_request_mutex;
|
||||
|
||||
using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
|
||||
RequestsQueue requests_queue{1};
|
||||
std::atomic<bool> shutdown{false};
|
||||
using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
|
||||
|
||||
std::mutex session_to_response_callback_mutex;
|
||||
SessionToResponseCallback session_to_response_callback;
|
||||
|
||||
ThreadFromGlobalPool processing_thread;
|
||||
|
||||
TestKeeperStorage storage;
|
||||
|
||||
private:
|
||||
void processingThread();
|
||||
void finalize();
|
||||
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
|
||||
|
||||
public:
|
||||
TestKeeperStorageDispatcher();
|
||||
~TestKeeperStorageDispatcher();
|
||||
|
||||
void putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
|
||||
int64_t getSessionID()
|
||||
{
|
||||
return storage.getSessionID();
|
||||
}
|
||||
void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
|
||||
/// Call if we don't need any responses for this session no more (session was expired)
|
||||
void finishSession(int64_t session_id);
|
||||
};
|
||||
|
||||
}
|
@ -37,6 +37,26 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
|
||||
out.next();
|
||||
}
|
||||
|
||||
void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
}
|
||||
|
||||
void ZooKeeperSyncResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
}
|
||||
|
||||
void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
void ZooKeeperWatchResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(type, in);
|
||||
@ -51,6 +71,13 @@ void ZooKeeperWatchResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
void ZooKeeperWatchResponse::write(WriteBuffer & out) const
|
||||
{
|
||||
if (error == Error::ZOK)
|
||||
ZooKeeperResponse::write(out);
|
||||
/// skip bad responses for watches
|
||||
}
|
||||
|
||||
void ZooKeeperAuthRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(type, out);
|
||||
@ -326,6 +353,12 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
|
||||
}
|
||||
}
|
||||
|
||||
bool ZooKeeperMultiRequest::isReadRequest() const
|
||||
{
|
||||
/// Possibly we can do better
|
||||
return false;
|
||||
}
|
||||
|
||||
void ZooKeeperMultiResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
for (auto & response : responses)
|
||||
@ -410,6 +443,7 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return std::make_shared<ZooKeeperCreateResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
|
||||
@ -465,6 +499,7 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
|
||||
ZooKeeperRequestFactory::ZooKeeperRequestFactory()
|
||||
{
|
||||
registerZooKeeperRequest<OpNum::Heartbeat, ZooKeeperHeartbeatRequest>(*this);
|
||||
registerZooKeeperRequest<OpNum::Sync, ZooKeeperSyncRequest>(*this);
|
||||
registerZooKeeperRequest<OpNum::Auth, ZooKeeperAuthRequest>(*this);
|
||||
registerZooKeeperRequest<OpNum::Close, ZooKeeperCloseRequest>(*this);
|
||||
registerZooKeeperRequest<OpNum::Create, ZooKeeperCreateRequest>(*this);
|
||||
|
@ -30,7 +30,7 @@ struct ZooKeeperResponse : virtual Response
|
||||
virtual ~ZooKeeperResponse() override = default;
|
||||
virtual void readImpl(ReadBuffer &) = 0;
|
||||
virtual void writeImpl(WriteBuffer &) const = 0;
|
||||
void write(WriteBuffer & out) const;
|
||||
virtual void write(WriteBuffer & out) const;
|
||||
virtual OpNum getOpNum() const = 0;
|
||||
};
|
||||
|
||||
@ -60,6 +60,7 @@ struct ZooKeeperRequest : virtual Request
|
||||
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
|
||||
|
||||
virtual ZooKeeperResponsePtr makeResponse() const = 0;
|
||||
virtual bool isReadRequest() const = 0;
|
||||
};
|
||||
|
||||
using ZooKeeperRequestPtr = std::shared_ptr<ZooKeeperRequest>;
|
||||
@ -71,6 +72,26 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperSyncRequest final : ZooKeeperRequest
|
||||
{
|
||||
String path;
|
||||
String getPath() const override { return path; }
|
||||
OpNum getOpNum() const override { return OpNum::Sync; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperSyncResponse final : ZooKeeperResponse
|
||||
{
|
||||
String path;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
OpNum getOpNum() const override { return OpNum::Sync; }
|
||||
};
|
||||
|
||||
struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
|
||||
@ -86,6 +107,8 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
|
||||
void write(WriteBuffer & out) const override;
|
||||
|
||||
OpNum getOpNum() const override
|
||||
{
|
||||
throw Exception("OpNum for watch response doesn't exist", Error::ZRUNTIMEINCONSISTENCY);
|
||||
@ -104,6 +127,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperAuthResponse final : ZooKeeperResponse
|
||||
@ -122,6 +146,7 @@ struct ZooKeeperCloseRequest final : ZooKeeperRequest
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperCloseResponse final : ZooKeeperResponse
|
||||
@ -146,6 +171,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
|
||||
@ -167,6 +193,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
|
||||
@ -183,6 +210,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return !has_watch; }
|
||||
};
|
||||
|
||||
struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
|
||||
@ -199,6 +227,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return !has_watch; }
|
||||
};
|
||||
|
||||
struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
|
||||
@ -217,6 +246,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
};
|
||||
|
||||
struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
|
||||
@ -232,6 +262,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return !has_watch; }
|
||||
};
|
||||
|
||||
struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest
|
||||
@ -261,6 +292,7 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return !has_watch; }
|
||||
};
|
||||
|
||||
struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
|
||||
@ -290,6 +322,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override;
|
||||
};
|
||||
|
||||
struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse
|
||||
|
@ -15,6 +15,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
|
||||
static_cast<int32_t>(OpNum::Get),
|
||||
static_cast<int32_t>(OpNum::Set),
|
||||
static_cast<int32_t>(OpNum::SimpleList),
|
||||
static_cast<int32_t>(OpNum::Sync),
|
||||
static_cast<int32_t>(OpNum::Heartbeat),
|
||||
static_cast<int32_t>(OpNum::List),
|
||||
static_cast<int32_t>(OpNum::Check),
|
||||
@ -48,6 +49,8 @@ std::string toString(OpNum op_num)
|
||||
return "Check";
|
||||
case OpNum::Multi:
|
||||
return "Multi";
|
||||
case OpNum::Sync:
|
||||
return "Sync";
|
||||
case OpNum::Heartbeat:
|
||||
return "Heartbeat";
|
||||
case OpNum::Auth:
|
||||
|
@ -24,6 +24,7 @@ enum class OpNum : int32_t
|
||||
Get = 4,
|
||||
Set = 5,
|
||||
SimpleList = 8,
|
||||
Sync = 9,
|
||||
Heartbeat = 11,
|
||||
List = 12,
|
||||
Check = 13,
|
||||
|
@ -3,6 +3,13 @@
|
||||
namespace Coordination
|
||||
{
|
||||
|
||||
|
||||
void write(size_t x, WriteBuffer & out)
|
||||
{
|
||||
x = __builtin_bswap64(x);
|
||||
writeBinary(x, out);
|
||||
}
|
||||
|
||||
void write(int64_t x, WriteBuffer & out)
|
||||
{
|
||||
x = __builtin_bswap64(x);
|
||||
@ -57,6 +64,12 @@ void write(const Error & x, WriteBuffer & out)
|
||||
write(static_cast<int32_t>(x), out);
|
||||
}
|
||||
|
||||
void read(size_t & x, ReadBuffer & in)
|
||||
{
|
||||
readBinary(x, in);
|
||||
x = __builtin_bswap64(x);
|
||||
}
|
||||
|
||||
void read(int64_t & x, ReadBuffer & in)
|
||||
{
|
||||
readBinary(x, in);
|
||||
|
@ -13,6 +13,7 @@ namespace Coordination
|
||||
|
||||
using namespace DB;
|
||||
|
||||
void write(size_t x, WriteBuffer & out);
|
||||
void write(int64_t x, WriteBuffer & out);
|
||||
void write(int32_t x, WriteBuffer & out);
|
||||
void write(OpNum x, WriteBuffer & out);
|
||||
@ -37,6 +38,7 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
|
||||
write(elem, out);
|
||||
}
|
||||
|
||||
void read(size_t & x, ReadBuffer & in);
|
||||
void read(int64_t & x, ReadBuffer & in);
|
||||
void read(int32_t & x, ReadBuffer & in);
|
||||
void read(OpNum & x, ReadBuffer & in);
|
||||
|
@ -50,6 +50,7 @@ struct Test
|
||||
{
|
||||
DB::WriteBufferFromFile wb(filename);
|
||||
wb.write(reinterpret_cast<const char *>(&store), sizeof(store));
|
||||
wb.close();
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -83,8 +83,6 @@ SRCS(
|
||||
WeakHash.cpp
|
||||
ZooKeeper/IKeeper.cpp
|
||||
ZooKeeper/TestKeeper.cpp
|
||||
ZooKeeper/TestKeeperStorage.cpp
|
||||
ZooKeeper/TestKeeperStorageDispatcher.cpp
|
||||
ZooKeeper/ZooKeeper.cpp
|
||||
ZooKeeper/ZooKeeperCommon.cpp
|
||||
ZooKeeper/ZooKeeperConstants.cpp
|
||||
|
0
src/Coordination/CMakeLists.txt
Normal file
0
src/Coordination/CMakeLists.txt
Normal file
35
src/Coordination/CoordinationSettings.cpp
Normal file
35
src/Coordination/CoordinationSettings.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_SETTING;
|
||||
}
|
||||
|
||||
IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
||||
void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
if (!config.has(config_elem))
|
||||
return;
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys config_keys;
|
||||
config.keys(config_elem, config_keys);
|
||||
|
||||
try
|
||||
{
|
||||
for (const String & key : config_keys)
|
||||
set(key, config.getString(config_elem + "." + key));
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
|
||||
e.addMessage("in Coordination settings config");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
43
src/Coordination/CoordinationSettings.h
Normal file
43
src/Coordination/CoordinationSettings.h
Normal file
@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/BaseSettings.h>
|
||||
#include <Core/SettingsEnums.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperConstants.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
/** These settings represent fine tunes for internal details of Coordination storages
|
||||
* and should not be changed by the user without a reason.
|
||||
*/
|
||||
|
||||
#define LIST_OF_COORDINATION_SETTINGS(M) \
|
||||
M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_SESSION_TIMEOUT_MS, "Default client session timeout", 0) \
|
||||
M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
|
||||
M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
|
||||
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
|
||||
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(UInt64, reserved_log_items, 5000, "How many log items to store (don't remove during compaction)", 0) \
|
||||
M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
|
||||
M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
|
||||
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
|
||||
M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
|
||||
M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
|
||||
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
||||
|
||||
struct CoordinationSettings : public BaseSettings<CoordinationSettingsTraits>
|
||||
{
|
||||
void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
|
||||
};
|
||||
|
||||
using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
|
||||
|
||||
}
|
194
src/Coordination/InMemoryLogStore.cpp
Normal file
194
src/Coordination/InMemoryLogStore.cpp
Normal file
@ -0,0 +1,194 @@
|
||||
#include <Coordination/InMemoryLogStore.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace nuraft;
|
||||
ptr<log_entry> makeClone(const ptr<log_entry> & entry)
|
||||
{
|
||||
ptr<log_entry> clone = cs_new<log_entry>(entry->get_term(), buffer::clone(entry->get_buf()), entry->get_val_type());
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
||||
InMemoryLogStore::InMemoryLogStore()
|
||||
: start_idx(1)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> buf = nuraft::buffer::alloc(sizeof(size_t));
|
||||
logs[0] = nuraft::cs_new<nuraft::log_entry>(0, buf);
|
||||
}
|
||||
|
||||
size_t InMemoryLogStore::start_index() const
|
||||
{
|
||||
return start_idx;
|
||||
}
|
||||
|
||||
size_t InMemoryLogStore::next_slot() const
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
// Exclude the dummy entry.
|
||||
return start_idx + logs.size() - 1;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::log_entry> InMemoryLogStore::last_entry() const
|
||||
{
|
||||
size_t next_idx = next_slot();
|
||||
std::lock_guard<std::mutex> lock(logs_lock);
|
||||
auto entry = logs.find(next_idx - 1);
|
||||
if (entry == logs.end())
|
||||
entry = logs.find(0);
|
||||
|
||||
return makeClone(entry->second);
|
||||
}
|
||||
|
||||
size_t InMemoryLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
|
||||
{
|
||||
ptr<log_entry> clone = makeClone(entry);
|
||||
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
size_t idx = start_idx + logs.size() - 1;
|
||||
logs[idx] = clone;
|
||||
return idx;
|
||||
}
|
||||
|
||||
void InMemoryLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
|
||||
{
|
||||
nuraft::ptr<log_entry> clone = makeClone(entry);
|
||||
|
||||
// Discard all logs equal to or greater than `index.
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
auto itr = logs.lower_bound(index);
|
||||
while (itr != logs.end())
|
||||
itr = logs.erase(itr);
|
||||
logs[index] = clone;
|
||||
}
|
||||
|
||||
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> InMemoryLogStore::log_entries(size_t start, size_t end)
|
||||
{
|
||||
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> ret =
|
||||
nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
|
||||
|
||||
ret->resize(end - start);
|
||||
size_t cc = 0;
|
||||
for (size_t ii = start; ii < end; ++ii)
|
||||
{
|
||||
nuraft::ptr<nuraft::log_entry> src = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
auto entry = logs.find(ii);
|
||||
if (entry == logs.end())
|
||||
{
|
||||
entry = logs.find(0);
|
||||
assert(0);
|
||||
}
|
||||
src = entry->second;
|
||||
}
|
||||
(*ret)[cc++] = makeClone(src);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::log_entry> InMemoryLogStore::entry_at(size_t index)
|
||||
{
|
||||
nuraft::ptr<nuraft::log_entry> src = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
auto entry = logs.find(index);
|
||||
if (entry == logs.end())
|
||||
entry = logs.find(0);
|
||||
src = entry->second;
|
||||
}
|
||||
return makeClone(src);
|
||||
}
|
||||
|
||||
size_t InMemoryLogStore::term_at(size_t index)
|
||||
{
|
||||
size_t term = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
auto entry = logs.find(index);
|
||||
if (entry == logs.end())
|
||||
entry = logs.find(0);
|
||||
term = entry->second->get_term();
|
||||
}
|
||||
return term;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> InMemoryLogStore::pack(size_t index, Int32 cnt)
|
||||
{
|
||||
std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
|
||||
|
||||
size_t size_total = 0;
|
||||
for (size_t ii = index; ii < index + cnt; ++ii)
|
||||
{
|
||||
ptr<log_entry> le = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
le = logs[ii];
|
||||
}
|
||||
assert(le.get());
|
||||
nuraft::ptr<nuraft::buffer> buf = le->serialize();
|
||||
size_total += buf->size();
|
||||
returned_logs.push_back(buf);
|
||||
}
|
||||
|
||||
nuraft::ptr<buffer> buf_out = nuraft::buffer::alloc(sizeof(int32) + cnt * sizeof(int32) + size_total);
|
||||
buf_out->pos(0);
|
||||
buf_out->put(static_cast<Int32>(cnt));
|
||||
|
||||
for (auto & entry : returned_logs)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> & bb = entry;
|
||||
buf_out->put(static_cast<Int32>(bb->size()));
|
||||
buf_out->put(*bb);
|
||||
}
|
||||
return buf_out;
|
||||
}
|
||||
|
||||
void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
|
||||
{
|
||||
pack.pos(0);
|
||||
Int32 num_logs = pack.get_int();
|
||||
|
||||
for (Int32 ii = 0; ii < num_logs; ++ii)
|
||||
{
|
||||
size_t cur_idx = index + ii;
|
||||
Int32 buf_size = pack.get_int();
|
||||
|
||||
nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
|
||||
pack.get(buf_local);
|
||||
|
||||
nuraft::ptr<nuraft::log_entry> le = nuraft::log_entry::deserialize(*buf_local);
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
logs[cur_idx] = le;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
auto entry = logs.upper_bound(0);
|
||||
if (entry != logs.end())
|
||||
start_idx = entry->first;
|
||||
else
|
||||
start_idx = 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool InMemoryLogStore::compact(size_t last_log_index)
|
||||
{
|
||||
std::lock_guard<std::mutex> l(logs_lock);
|
||||
for (size_t ii = start_idx; ii <= last_log_index; ++ii)
|
||||
{
|
||||
auto entry = logs.find(ii);
|
||||
if (entry != logs.end())
|
||||
logs.erase(entry);
|
||||
}
|
||||
|
||||
start_idx = last_log_index + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
47
src/Coordination/InMemoryLogStore.h
Normal file
47
src/Coordination/InMemoryLogStore.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <Core/Types.h>
|
||||
#include <libnuraft/log_store.hxx> // Y_IGNORE
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class InMemoryLogStore : public nuraft::log_store
|
||||
{
|
||||
public:
|
||||
InMemoryLogStore();
|
||||
|
||||
size_t start_index() const override;
|
||||
|
||||
size_t next_slot() const override;
|
||||
|
||||
nuraft::ptr<nuraft::log_entry> last_entry() const override;
|
||||
|
||||
size_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
|
||||
|
||||
void write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
|
||||
|
||||
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(size_t start, size_t end) override;
|
||||
|
||||
nuraft::ptr<nuraft::log_entry> entry_at(size_t index) override;
|
||||
|
||||
size_t term_at(size_t index) override;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pack(size_t index, Int32 cnt) override;
|
||||
|
||||
void apply_pack(size_t index, nuraft::buffer & pack) override;
|
||||
|
||||
bool compact(size_t last_log_index) override;
|
||||
|
||||
bool flush() override { return true; }
|
||||
|
||||
private:
|
||||
std::map<size_t, nuraft::ptr<nuraft::log_entry>> logs;
|
||||
mutable std::mutex logs_lock;
|
||||
std::atomic<size_t> start_idx;
|
||||
};
|
||||
|
||||
}
|
78
src/Coordination/InMemoryStateManager.cpp
Normal file
78
src/Coordination/InMemoryStateManager.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
#include <Coordination/InMemoryStateManager.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RAFT_ERROR;
|
||||
}
|
||||
|
||||
InMemoryStateManager::InMemoryStateManager(int server_id_, const std::string & host, int port)
|
||||
: my_server_id(server_id_)
|
||||
, my_port(port)
|
||||
, log_store(nuraft::cs_new<InMemoryLogStore>())
|
||||
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
|
||||
{
|
||||
auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
|
||||
cluster_config->get_servers().push_back(peer_config);
|
||||
}
|
||||
|
||||
InMemoryStateManager::InMemoryStateManager(
|
||||
int my_server_id_,
|
||||
const std::string & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & config)
|
||||
: my_server_id(my_server_id_)
|
||||
, log_store(nuraft::cs_new<InMemoryLogStore>())
|
||||
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(config_prefix, keys);
|
||||
|
||||
for (const auto & server_key : keys)
|
||||
{
|
||||
std::string full_prefix = config_prefix + "." + server_key;
|
||||
int server_id = config.getInt(full_prefix + ".id");
|
||||
std::string hostname = config.getString(full_prefix + ".hostname");
|
||||
int port = config.getInt(full_prefix + ".port");
|
||||
bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
|
||||
int32_t priority = config.getInt(full_prefix + ".priority", 1);
|
||||
bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
|
||||
if (start_as_follower)
|
||||
start_as_follower_servers.insert(server_id);
|
||||
|
||||
auto endpoint = hostname + ":" + std::to_string(port);
|
||||
auto peer_config = nuraft::cs_new<nuraft::srv_config>(server_id, 0, endpoint, "", !can_become_leader, priority);
|
||||
if (server_id == my_server_id)
|
||||
{
|
||||
my_server_config = peer_config;
|
||||
my_port = port;
|
||||
}
|
||||
|
||||
cluster_config->get_servers().push_back(peer_config);
|
||||
}
|
||||
if (!my_server_config)
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
|
||||
|
||||
if (start_as_follower_servers.size() == cluster_config->get_servers().size())
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
|
||||
}
|
||||
|
||||
void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
|
||||
{
|
||||
// Just keep in memory in this example.
|
||||
// Need to write to disk here, if want to make it durable.
|
||||
nuraft::ptr<nuraft::buffer> buf = config.serialize();
|
||||
cluster_config = nuraft::cluster_config::deserialize(*buf);
|
||||
}
|
||||
|
||||
void InMemoryStateManager::save_state(const nuraft::srv_state & state)
|
||||
{
|
||||
// Just keep in memory in this example.
|
||||
// Need to write to disk here, if want to make it durable.
|
||||
nuraft::ptr<nuraft::buffer> buf = state.serialize();
|
||||
server_state = nuraft::srv_state::deserialize(*buf);
|
||||
}
|
||||
|
||||
}
|
58
src/Coordination/InMemoryStateManager.h
Normal file
58
src/Coordination/InMemoryStateManager.h
Normal file
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <string>
|
||||
#include <Coordination/InMemoryLogStore.h>
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class InMemoryStateManager : public nuraft::state_mgr
|
||||
{
|
||||
public:
|
||||
InMemoryStateManager(
|
||||
int server_id_,
|
||||
const std::string & config_prefix,
|
||||
const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
InMemoryStateManager(
|
||||
int server_id_,
|
||||
const std::string & host,
|
||||
int port);
|
||||
|
||||
nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
|
||||
|
||||
void save_config(const nuraft::cluster_config & config) override;
|
||||
|
||||
void save_state(const nuraft::srv_state & state) override;
|
||||
|
||||
nuraft::ptr<nuraft::srv_state> read_state() override { return server_state; }
|
||||
|
||||
nuraft::ptr<nuraft::log_store> load_log_store() override { return log_store; }
|
||||
|
||||
Int32 server_id() override { return my_server_id; }
|
||||
|
||||
nuraft::ptr<nuraft::srv_config> get_srv_config() const { return my_server_config; }
|
||||
|
||||
void system_exit(const int /* exit_code */) override {}
|
||||
|
||||
int getPort() const { return my_port; }
|
||||
|
||||
bool shouldStartAsFollower() const
|
||||
{
|
||||
return start_as_follower_servers.count(my_server_id);
|
||||
}
|
||||
|
||||
private:
|
||||
int my_server_id;
|
||||
int my_port;
|
||||
std::unordered_set<int> start_as_follower_servers;
|
||||
nuraft::ptr<InMemoryLogStore> log_store;
|
||||
nuraft::ptr<nuraft::srv_config> my_server_config;
|
||||
nuraft::ptr<nuraft::cluster_config> cluster_config;
|
||||
nuraft::ptr<nuraft::srv_state> server_state;
|
||||
};
|
||||
|
||||
}
|
47
src/Coordination/LoggerWrapper.h
Normal file
47
src/Coordination/LoggerWrapper.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <common/logger_useful.h>
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class LoggerWrapper : public nuraft::logger
|
||||
{
|
||||
public:
|
||||
LoggerWrapper(const std::string & name, LogsLevel level_)
|
||||
: log(&Poco::Logger::get(name))
|
||||
, level(static_cast<int>(level_))
|
||||
{
|
||||
log->setLevel(level);
|
||||
}
|
||||
|
||||
void put_details(
|
||||
int level_,
|
||||
const char * /* source_file */,
|
||||
const char * /* func_name */,
|
||||
size_t /* line_number */,
|
||||
const std::string & msg) override
|
||||
{
|
||||
LOG_IMPL(log, static_cast<DB::LogsLevel>(level_), static_cast<Poco::Message::Priority>(level_), msg);
|
||||
}
|
||||
|
||||
void set_level(int level_) override
|
||||
{
|
||||
level_ = std::min(6, std::max(1, level_));
|
||||
log->setLevel(level_);
|
||||
level = level_;
|
||||
}
|
||||
|
||||
int get_level() override
|
||||
{
|
||||
return level;
|
||||
}
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
std::atomic<int> level;
|
||||
};
|
||||
|
||||
}
|
24
src/Coordination/NuKeeperCommon.h
Normal file
24
src/Coordination/NuKeeperCommon.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct NuKeeperRequest
|
||||
{
|
||||
int64_t session_id;
|
||||
Coordination::ZooKeeperRequestPtr request;
|
||||
};
|
||||
|
||||
using NuKeeperRequests = std::vector<NuKeeperRequest>;
|
||||
|
||||
struct NuKeeperResponse
|
||||
{
|
||||
int64_t session_id;
|
||||
Coordination::ZooKeeperRequestPtr response;
|
||||
};
|
||||
|
||||
using NuKeeperResponses = std::vector<NuKeeperResponse>;
|
||||
|
||||
}
|
182
src/Coordination/NuKeeperServer.cpp
Normal file
182
src/Coordination/NuKeeperServer.cpp
Normal file
@ -0,0 +1,182 @@
|
||||
#include <Coordination/NuKeeperServer.h>
|
||||
#include <Coordination/LoggerWrapper.h>
|
||||
#include <Coordination/NuKeeperStateMachine.h>
|
||||
#include <Coordination/InMemoryStateManager.h>
|
||||
#include <Coordination/WriteBufferFromNuraftBuffer.h>
|
||||
#include <Coordination/ReadBufferFromNuraftBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <chrono>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <string>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RAFT_ERROR;
|
||||
}
|
||||
|
||||
NuKeeperServer::NuKeeperServer(
|
||||
int server_id_,
|
||||
const CoordinationSettingsPtr & coordination_settings_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
ResponsesQueue & responses_queue_)
|
||||
: server_id(server_id_)
|
||||
, coordination_settings(coordination_settings_)
|
||||
, state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
|
||||
, state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, "test_keeper_server.raft_configuration", config))
|
||||
, responses_queue(responses_queue_)
|
||||
{
|
||||
}
|
||||
|
||||
void NuKeeperServer::startup()
|
||||
{
|
||||
nuraft::raft_params params;
|
||||
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
|
||||
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
|
||||
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
|
||||
params.reserved_log_items_ = coordination_settings->reserved_log_items;
|
||||
params.snapshot_distance_ = coordination_settings->snapshot_distance;
|
||||
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
params.auto_forwarding_ = coordination_settings->auto_forwarding;
|
||||
params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
|
||||
|
||||
params.return_method_ = nuraft::raft_params::blocking;
|
||||
|
||||
nuraft::asio_service::options asio_opts{};
|
||||
nuraft::raft_server::init_options init_options;
|
||||
init_options.skip_initial_election_timeout_ = state_manager->shouldStartAsFollower();
|
||||
init_options.raft_callback_ = [this] (nuraft::cb_func::Type type, nuraft::cb_func::Param * param)
|
||||
{
|
||||
return callbackFunc(type, param);
|
||||
};
|
||||
|
||||
raft_instance = launcher.init(
|
||||
state_machine, state_manager, nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level), state_manager->getPort(),
|
||||
asio_opts, params, init_options);
|
||||
|
||||
if (!raft_instance)
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
|
||||
}
|
||||
|
||||
void NuKeeperServer::shutdown()
|
||||
{
|
||||
state_machine->shutdownStorage();
|
||||
if (!launcher.shutdown(coordination_settings->shutdown_timeout.totalSeconds()))
|
||||
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
|
||||
{
|
||||
DB::WriteBufferFromNuraftBuffer buf;
|
||||
DB::writeIntBinary(session_id, buf);
|
||||
request->write(buf);
|
||||
return buf.getBuffer();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
|
||||
{
|
||||
auto [session_id, request] = request_for_session;
|
||||
if (isLeaderAlive() && request->isReadRequest())
|
||||
{
|
||||
state_machine->processReadRequest(request_for_session);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<nuraft::ptr<nuraft::buffer>> entries;
|
||||
entries.push_back(getZooKeeperLogEntry(session_id, request));
|
||||
|
||||
std::lock_guard lock(append_entries_mutex);
|
||||
|
||||
auto result = raft_instance->append_entries(entries);
|
||||
if (!result->get_accepted())
|
||||
{
|
||||
NuKeeperStorage::ResponsesForSessions responses;
|
||||
auto response = request->makeResponse();
|
||||
response->xid = request->xid;
|
||||
response->zxid = 0;
|
||||
response->error = Coordination::Error::ZOPERATIONTIMEOUT;
|
||||
responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
|
||||
}
|
||||
|
||||
if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
|
||||
{
|
||||
NuKeeperStorage::ResponsesForSessions responses;
|
||||
auto response = request->makeResponse();
|
||||
response->xid = request->xid;
|
||||
response->zxid = 0;
|
||||
response->error = Coordination::Error::ZOPERATIONTIMEOUT;
|
||||
responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
|
||||
}
|
||||
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
|
||||
}
|
||||
}
|
||||
|
||||
int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
|
||||
{
|
||||
auto entry = nuraft::buffer::alloc(sizeof(int64_t));
|
||||
/// Just special session request
|
||||
nuraft::buffer_serializer bs(entry);
|
||||
bs.put_i64(session_timeout_ms);
|
||||
|
||||
std::lock_guard lock(append_entries_mutex);
|
||||
|
||||
auto result = raft_instance->append_entries({entry});
|
||||
|
||||
if (!result->get_accepted())
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Cannot send session_id request to RAFT");
|
||||
|
||||
if (result->get_result_code() != nuraft::cmd_result_code::OK)
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "session_id request failed to RAFT");
|
||||
|
||||
auto resp = result->get();
|
||||
if (resp == nullptr)
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Received nullptr as session_id");
|
||||
|
||||
nuraft::buffer_serializer bs_resp(resp);
|
||||
return bs_resp.get_i64();
|
||||
}
|
||||
|
||||
bool NuKeeperServer::isLeader() const
|
||||
{
|
||||
return raft_instance->is_leader();
|
||||
}
|
||||
|
||||
bool NuKeeperServer::isLeaderAlive() const
|
||||
{
|
||||
return raft_instance->is_leader_alive();
|
||||
}
|
||||
|
||||
nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
|
||||
{
|
||||
if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
|
||||
{
|
||||
std::unique_lock lock(initialized_mutex);
|
||||
initialized_flag = true;
|
||||
initialized_cv.notify_all();
|
||||
}
|
||||
return nuraft::cb_func::ReturnCode::Ok;
|
||||
}
|
||||
|
||||
void NuKeeperServer::waitInit()
|
||||
{
|
||||
std::unique_lock lock(initialized_mutex);
|
||||
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
|
||||
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
|
||||
}
|
||||
|
||||
std::unordered_set<int64_t> NuKeeperServer::getDeadSessions()
|
||||
{
|
||||
return state_machine->getDeadSessions();
|
||||
}
|
||||
|
||||
}
|
63
src/Coordination/NuKeeperServer.h
Normal file
63
src/Coordination/NuKeeperServer.h
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <Coordination/InMemoryLogStore.h>
|
||||
#include <Coordination/InMemoryStateManager.h>
|
||||
#include <Coordination/NuKeeperStateMachine.h>
|
||||
#include <Coordination/NuKeeperStorage.h>
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class NuKeeperServer
|
||||
{
|
||||
private:
|
||||
int server_id;
|
||||
|
||||
CoordinationSettingsPtr coordination_settings;
|
||||
|
||||
nuraft::ptr<NuKeeperStateMachine> state_machine;
|
||||
|
||||
nuraft::ptr<InMemoryStateManager> state_manager;
|
||||
|
||||
nuraft::raft_launcher launcher;
|
||||
|
||||
nuraft::ptr<nuraft::raft_server> raft_instance;
|
||||
|
||||
std::mutex append_entries_mutex;
|
||||
|
||||
ResponsesQueue & responses_queue;
|
||||
|
||||
std::mutex initialized_mutex;
|
||||
bool initialized_flag = false;
|
||||
std::condition_variable initialized_cv;
|
||||
|
||||
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
|
||||
|
||||
public:
|
||||
NuKeeperServer(
|
||||
int server_id_,
|
||||
const CoordinationSettingsPtr & coordination_settings_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
ResponsesQueue & responses_queue_);
|
||||
|
||||
void startup();
|
||||
|
||||
void putRequest(const NuKeeperStorage::RequestForSession & request);
|
||||
|
||||
int64_t getSessionID(int64_t session_timeout_ms);
|
||||
|
||||
std::unordered_set<int64_t> getDeadSessions();
|
||||
|
||||
bool isLeader() const;
|
||||
|
||||
bool isLeaderAlive() const;
|
||||
|
||||
void waitInit();
|
||||
|
||||
void shutdown();
|
||||
};
|
||||
|
||||
}
|
262
src/Coordination/NuKeeperStateMachine.cpp
Normal file
262
src/Coordination/NuKeeperStateMachine.cpp
Normal file
@ -0,0 +1,262 @@
|
||||
#include <Coordination/NuKeeperStateMachine.h>
|
||||
#include <Coordination/ReadBufferFromNuraftBuffer.h>
|
||||
#include <Coordination/WriteBufferFromNuraftBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <Coordination/NuKeeperStorageSerializer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
|
||||
{
|
||||
ReadBufferFromNuraftBuffer buffer(data);
|
||||
NuKeeperStorage::RequestForSession request_for_session;
|
||||
readIntBinary(request_for_session.session_id, buffer);
|
||||
|
||||
int32_t length;
|
||||
Coordination::read(length, buffer);
|
||||
|
||||
int32_t xid;
|
||||
Coordination::read(xid, buffer);
|
||||
|
||||
Coordination::OpNum opnum;
|
||||
Coordination::read(opnum, buffer);
|
||||
|
||||
request_for_session.request = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
|
||||
request_for_session.request->xid = xid;
|
||||
request_for_session.request->readImpl(buffer);
|
||||
return request_for_session;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> writeResponses(NuKeeperStorage::ResponsesForSessions & responses)
|
||||
{
|
||||
WriteBufferFromNuraftBuffer buffer;
|
||||
for (const auto & response_and_session : responses)
|
||||
{
|
||||
writeIntBinary(response_and_session.session_id, buffer);
|
||||
response_and_session.response->write(buffer);
|
||||
}
|
||||
return buffer.getBuffer();
|
||||
}
|
||||
|
||||
|
||||
NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_)
|
||||
: coordination_settings(coordination_settings_)
|
||||
, storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
|
||||
, responses_queue(responses_queue_)
|
||||
, last_committed_idx(0)
|
||||
, log(&Poco::Logger::get("NuRaftStateMachine"))
|
||||
{
|
||||
LOG_DEBUG(log, "Created nukeeper state machine");
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
if (data.size() == sizeof(int64_t))
|
||||
{
|
||||
nuraft::buffer_serializer timeout_data(data);
|
||||
int64_t session_timeout_ms = timeout_data.get_i64();
|
||||
auto response = nuraft::buffer::alloc(sizeof(int64_t));
|
||||
int64_t session_id;
|
||||
nuraft::buffer_serializer bs(response);
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
session_id = storage.getSessionID(session_timeout_ms);
|
||||
bs.put_i64(session_id);
|
||||
}
|
||||
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
|
||||
last_committed_idx = log_idx;
|
||||
return response;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto request_for_session = parseRequest(data);
|
||||
NuKeeperStorage::ResponsesForSessions responses_for_sessions;
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id);
|
||||
for (auto & response_for_session : responses_for_sessions)
|
||||
responses_queue.push(response_for_session);
|
||||
}
|
||||
|
||||
last_committed_idx = log_idx;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
{
|
||||
LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
|
||||
StorageSnapshotPtr snapshot;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
auto entry = snapshots.find(s.get_last_log_idx());
|
||||
if (entry == snapshots.end())
|
||||
return false;
|
||||
snapshot = entry->second;
|
||||
}
|
||||
std::lock_guard lock(storage_lock);
|
||||
storage = snapshot->storage;
|
||||
last_committed_idx = s.get_last_log_idx();
|
||||
return true;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
|
||||
{
|
||||
// Just return the latest snapshot.
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
auto entry = snapshots.rbegin();
|
||||
if (entry == snapshots.rend())
|
||||
return nullptr;
|
||||
|
||||
return entry->second->snapshot;
|
||||
}
|
||||
|
||||
NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::createSnapshotInternal(nuraft::snapshot & s)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
||||
nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
|
||||
std::lock_guard lock(storage_lock);
|
||||
return std::make_shared<NuKeeperStateMachine::StorageSnapshot>(ss, storage);
|
||||
}
|
||||
|
||||
NuKeeperStateMachine::StorageSnapshotPtr NuKeeperStateMachine::readSnapshot(nuraft::snapshot & s, nuraft::buffer & in)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
||||
nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
|
||||
NuKeeperStorageSerializer serializer;
|
||||
|
||||
ReadBufferFromNuraftBuffer reader(in);
|
||||
NuKeeperStorage new_storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
|
||||
serializer.deserialize(new_storage, reader);
|
||||
return std::make_shared<StorageSnapshot>(ss, new_storage);
|
||||
}
|
||||
|
||||
|
||||
void NuKeeperStateMachine::writeSnapshot(const NuKeeperStateMachine::StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out)
|
||||
{
|
||||
NuKeeperStorageSerializer serializer;
|
||||
|
||||
WriteBufferFromNuraftBuffer writer;
|
||||
serializer.serialize(snapshot->storage, writer);
|
||||
out = writer.getBuffer();
|
||||
}
|
||||
|
||||
void NuKeeperStateMachine::create_snapshot(
|
||||
nuraft::snapshot & s,
|
||||
nuraft::async_result<bool>::handler_type & when_done)
|
||||
{
|
||||
LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
|
||||
auto snapshot = createSnapshotInternal(s);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
snapshots[s.get_last_log_idx()] = snapshot;
|
||||
size_t num = snapshots.size();
|
||||
if (num > coordination_settings->max_stored_snapshots)
|
||||
{
|
||||
auto entry = snapshots.begin();
|
||||
|
||||
for (size_t i = 0; i < num - coordination_settings->max_stored_snapshots; ++i)
|
||||
{
|
||||
if (entry == snapshots.end())
|
||||
break;
|
||||
entry = snapshots.erase(entry);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Created snapshot {}", s.get_last_log_idx());
|
||||
nuraft::ptr<std::exception> except(nullptr);
|
||||
bool ret = true;
|
||||
when_done(ret, except);
|
||||
}
|
||||
|
||||
void NuKeeperStateMachine::save_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
size_t & obj_id,
|
||||
nuraft::buffer & data,
|
||||
bool /*is_first_obj*/,
|
||||
bool /*is_last_obj*/)
|
||||
{
|
||||
LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
|
||||
|
||||
if (obj_id == 0)
|
||||
{
|
||||
auto new_snapshot = createSnapshotInternal(s);
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
snapshots.try_emplace(s.get_last_log_idx(), std::move(new_snapshot));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto received_snapshot = readSnapshot(s, data);
|
||||
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
snapshots[s.get_last_log_idx()] = std::move(received_snapshot);
|
||||
}
|
||||
|
||||
obj_id++;
|
||||
}
|
||||
|
||||
int NuKeeperStateMachine::read_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
void* & /*user_snp_ctx*/,
|
||||
ulong obj_id,
|
||||
nuraft::ptr<nuraft::buffer> & data_out,
|
||||
bool & is_last_obj)
|
||||
{
|
||||
|
||||
LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
|
||||
StorageSnapshotPtr required_snapshot;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(snapshots_lock);
|
||||
auto entry = snapshots.find(s.get_last_log_idx());
|
||||
if (entry == snapshots.end())
|
||||
{
|
||||
// Snapshot doesn't exist.
|
||||
data_out = nullptr;
|
||||
is_last_obj = true;
|
||||
return 0;
|
||||
}
|
||||
required_snapshot = entry->second;
|
||||
}
|
||||
|
||||
if (obj_id == 0)
|
||||
{
|
||||
auto new_snapshot = createSnapshotInternal(s);
|
||||
writeSnapshot(new_snapshot, data_out);
|
||||
is_last_obj = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
writeSnapshot(required_snapshot, data_out);
|
||||
is_last_obj = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
|
||||
{
|
||||
NuKeeperStorage::ResponsesForSessions responses;
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
responses = storage.processRequest(request_for_session.request, request_for_session.session_id);
|
||||
}
|
||||
for (const auto & response : responses)
|
||||
responses_queue.push(response);
|
||||
}
|
||||
|
||||
std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
return storage.getDeadSessions();
|
||||
}
|
||||
|
||||
void NuKeeperStateMachine::shutdownStorage()
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
storage.finalize();
|
||||
}
|
||||
|
||||
}
|
99
src/Coordination/NuKeeperStateMachine.h
Normal file
99
src/Coordination/NuKeeperStateMachine.h
Normal file
@ -0,0 +1,99 @@
|
||||
#pragma once
|
||||
|
||||
#include <Coordination/NuKeeperStorage.h>
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <common/logger_useful.h>
|
||||
#include <Coordination/ThreadSafeQueue.h>
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
|
||||
|
||||
class NuKeeperStateMachine : public nuraft::state_machine
|
||||
{
|
||||
public:
|
||||
NuKeeperStateMachine(ResponsesQueue & responses_queue_, const CoordinationSettingsPtr & coordination_settings_);
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
|
||||
|
||||
nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
|
||||
|
||||
size_t last_commit_index() override { return last_committed_idx; }
|
||||
|
||||
bool apply_snapshot(nuraft::snapshot & s) override;
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> last_snapshot() override;
|
||||
|
||||
void create_snapshot(
|
||||
nuraft::snapshot & s,
|
||||
nuraft::async_result<bool>::handler_type & when_done) override;
|
||||
|
||||
void save_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
size_t & obj_id,
|
||||
nuraft::buffer & data,
|
||||
bool is_first_obj,
|
||||
bool is_last_obj) override;
|
||||
|
||||
int read_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
void* & user_snp_ctx,
|
||||
ulong obj_id,
|
||||
nuraft::ptr<nuraft::buffer> & data_out,
|
||||
bool & is_last_obj) override;
|
||||
|
||||
NuKeeperStorage & getStorage()
|
||||
{
|
||||
return storage;
|
||||
}
|
||||
|
||||
void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
|
||||
|
||||
std::unordered_set<int64_t> getDeadSessions();
|
||||
|
||||
void shutdownStorage();
|
||||
|
||||
private:
|
||||
struct StorageSnapshot
|
||||
{
|
||||
StorageSnapshot(const nuraft::ptr<nuraft::snapshot> & s, const NuKeeperStorage & storage_)
|
||||
: snapshot(s)
|
||||
, storage(storage_)
|
||||
{}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> snapshot;
|
||||
NuKeeperStorage storage;
|
||||
};
|
||||
|
||||
using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
|
||||
|
||||
StorageSnapshotPtr createSnapshotInternal(nuraft::snapshot & s);
|
||||
|
||||
StorageSnapshotPtr readSnapshot(nuraft::snapshot & s, nuraft::buffer & in);
|
||||
|
||||
static void writeSnapshot(const StorageSnapshotPtr & snapshot, nuraft::ptr<nuraft::buffer> & out);
|
||||
|
||||
CoordinationSettingsPtr coordination_settings;
|
||||
|
||||
NuKeeperStorage storage;
|
||||
|
||||
ResponsesQueue & responses_queue;
|
||||
/// Mutex for snapshots
|
||||
std::mutex snapshots_lock;
|
||||
|
||||
/// Lock for storage
|
||||
std::mutex storage_lock;
|
||||
|
||||
/// Fake snapshot storage
|
||||
std::map<uint64_t, StorageSnapshotPtr> snapshots;
|
||||
|
||||
/// Last committed Raft log number.
|
||||
std::atomic<size_t> last_committed_idx;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
#include <Common/ZooKeeper/TestKeeperStorage.h>
|
||||
#include <Coordination/NuKeeperStorage.h>
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <mutex>
|
||||
@ -17,13 +17,6 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace zkutil
|
||||
{
|
||||
|
||||
using namespace DB;
|
||||
|
||||
static String parentPath(const String & path)
|
||||
{
|
||||
auto rslash_pos = path.rfind('/');
|
||||
@ -38,20 +31,20 @@ static String baseName(const String & path)
|
||||
return path.substr(rslash_pos + 1);
|
||||
}
|
||||
|
||||
static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches, Coordination::Event event_type)
|
||||
static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
|
||||
{
|
||||
TestKeeperStorage::ResponsesForSessions result;
|
||||
NuKeeperStorage::ResponsesForSessions result;
|
||||
auto it = watches.find(path);
|
||||
if (it != watches.end())
|
||||
{
|
||||
std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
|
||||
watch_response->path = path;
|
||||
watch_response->xid = -1;
|
||||
watch_response->xid = Coordination::WATCH_XID;
|
||||
watch_response->zxid = -1;
|
||||
watch_response->type = event_type;
|
||||
watch_response->state = Coordination::State::CONNECTED;
|
||||
for (auto watcher_session : it->second)
|
||||
result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_response});
|
||||
result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_response});
|
||||
|
||||
watches.erase(it);
|
||||
}
|
||||
@ -62,58 +55,69 @@ static TestKeeperStorage::ResponsesForSessions processWatchesImpl(const String &
|
||||
{
|
||||
std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_list_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
|
||||
watch_list_response->path = parent_path;
|
||||
watch_list_response->xid = -1;
|
||||
watch_list_response->xid = Coordination::WATCH_XID;
|
||||
watch_list_response->zxid = -1;
|
||||
watch_list_response->type = Coordination::Event::CHILD;
|
||||
watch_list_response->state = Coordination::State::CONNECTED;
|
||||
for (auto watcher_session : it->second)
|
||||
result.push_back(TestKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
|
||||
result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
|
||||
|
||||
list_watches.erase(it);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TestKeeperStorage::TestKeeperStorage()
|
||||
NuKeeperStorage::NuKeeperStorage(int64_t tick_time_ms)
|
||||
: session_expiry_queue(tick_time_ms)
|
||||
{
|
||||
container.emplace("/", Node());
|
||||
}
|
||||
|
||||
using Undo = std::function<void()>;
|
||||
|
||||
struct TestKeeperStorageRequest
|
||||
struct NuKeeperStorageRequest
|
||||
{
|
||||
Coordination::ZooKeeperRequestPtr zk_request;
|
||||
|
||||
explicit TestKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
|
||||
explicit NuKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
|
||||
: zk_request(zk_request_)
|
||||
{}
|
||||
virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
|
||||
virtual TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & /*watches*/, TestKeeperStorage::Watches & /*list_watches*/) const { return {}; }
|
||||
virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
|
||||
virtual NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & /*watches*/, NuKeeperStorage::Watches & /*list_watches*/) const { return {}; }
|
||||
|
||||
virtual ~TestKeeperStorageRequest() = default;
|
||||
virtual ~NuKeeperStorageRequest() = default;
|
||||
};
|
||||
|
||||
struct TestKeeperStorageHeartbeatRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageHeartbeatRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & /* container */, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
|
||||
{
|
||||
return {zk_request->makeResponse(), {}};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageSyncRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
|
||||
{
|
||||
auto response = zk_request->makeResponse();
|
||||
dynamic_cast<Coordination::ZooKeeperSyncResponse *>(response.get())->path = dynamic_cast<Coordination::ZooKeeperSyncRequest *>(zk_request.get())->path;
|
||||
return {response, {}};
|
||||
}
|
||||
};
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
|
||||
struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
|
||||
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
|
||||
{
|
||||
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED);
|
||||
}
|
||||
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Undo undo;
|
||||
@ -138,8 +142,7 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
else
|
||||
{
|
||||
TestKeeperStorage::Node created_node;
|
||||
created_node.seq_num = 0;
|
||||
NuKeeperStorage::Node created_node;
|
||||
created_node.stat.czxid = zxid;
|
||||
created_node.stat.mzxid = zxid;
|
||||
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
|
||||
@ -193,10 +196,10 @@ struct TestKeeperStorageCreateRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperGetResponse & response = dynamic_cast<Coordination::ZooKeeperGetResponse &>(*response_ptr);
|
||||
@ -218,10 +221,10 @@ struct TestKeeperStorageGetRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
|
||||
@ -268,16 +271,16 @@ struct TestKeeperStorageRemoveRequest final : public TestKeeperStorageRequest
|
||||
return { response_ptr, undo };
|
||||
}
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
|
||||
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
|
||||
{
|
||||
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED);
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageExistsRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperExistsResponse & response = dynamic_cast<Coordination::ZooKeeperExistsResponse &>(*response_ptr);
|
||||
@ -298,10 +301,10 @@ struct TestKeeperStorageExistsRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperSetResponse & response = dynamic_cast<Coordination::ZooKeeperSetResponse &>(*response_ptr);
|
||||
@ -341,17 +344,17 @@ struct TestKeeperStorageSetRequest final : public TestKeeperStorageRequest
|
||||
return { response_ptr, undo };
|
||||
}
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
|
||||
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
|
||||
{
|
||||
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperListResponse & response = dynamic_cast<Coordination::ZooKeeperListResponse &>(*response_ptr);
|
||||
@ -387,10 +390,10 @@ struct TestKeeperStorageListRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageCheckRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperCheckResponse & response = dynamic_cast<Coordination::ZooKeeperCheckResponse &>(*response_ptr);
|
||||
@ -413,11 +416,11 @@ struct TestKeeperStorageCheckRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
std::vector<TestKeeperStorageRequestPtr> concrete_requests;
|
||||
explicit TestKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
|
||||
: TestKeeperStorageRequest(zk_request_)
|
||||
std::vector<NuKeeperStorageRequestPtr> concrete_requests;
|
||||
explicit NuKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
|
||||
: NuKeeperStorageRequest(zk_request_)
|
||||
{
|
||||
Coordination::ZooKeeperMultiRequest & request = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*zk_request);
|
||||
concrete_requests.reserve(request.requests.size());
|
||||
@ -427,26 +430,26 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
|
||||
auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
|
||||
if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
|
||||
{
|
||||
concrete_requests.push_back(std::make_shared<TestKeeperStorageCreateRequest>(sub_zk_request));
|
||||
concrete_requests.push_back(std::make_shared<NuKeeperStorageCreateRequest>(sub_zk_request));
|
||||
}
|
||||
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
|
||||
{
|
||||
concrete_requests.push_back(std::make_shared<TestKeeperStorageRemoveRequest>(sub_zk_request));
|
||||
concrete_requests.push_back(std::make_shared<NuKeeperStorageRemoveRequest>(sub_zk_request));
|
||||
}
|
||||
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
|
||||
{
|
||||
concrete_requests.push_back(std::make_shared<TestKeeperStorageSetRequest>(sub_zk_request));
|
||||
concrete_requests.push_back(std::make_shared<NuKeeperStorageSetRequest>(sub_zk_request));
|
||||
}
|
||||
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
|
||||
{
|
||||
concrete_requests.push_back(std::make_shared<TestKeeperStorageCheckRequest>(sub_zk_request));
|
||||
concrete_requests.push_back(std::make_shared<NuKeeperStorageCheckRequest>(sub_zk_request));
|
||||
}
|
||||
else
|
||||
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container & container, TestKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperMultiResponse & response = dynamic_cast<Coordination::ZooKeeperMultiResponse &>(*response_ptr);
|
||||
@ -499,9 +502,9 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
}
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions processWatches(TestKeeperStorage::Watches & watches, TestKeeperStorage::Watches & list_watches) const override
|
||||
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
|
||||
{
|
||||
TestKeeperStorage::ResponsesForSessions result;
|
||||
NuKeeperStorage::ResponsesForSessions result;
|
||||
for (const auto & generic_request : concrete_requests)
|
||||
{
|
||||
auto responses = generic_request->processWatches(watches, list_watches);
|
||||
@ -511,75 +514,49 @@ struct TestKeeperStorageMultiRequest final : public TestKeeperStorageRequest
|
||||
}
|
||||
};
|
||||
|
||||
struct TestKeeperStorageCloseRequest final : public TestKeeperStorageRequest
|
||||
struct NuKeeperStorageCloseRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using TestKeeperStorageRequest::TestKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(TestKeeperStorage::Container &, TestKeeperStorage::Ephemerals &, int64_t, int64_t) const override
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container &, NuKeeperStorage::Ephemerals &, int64_t, int64_t) const override
|
||||
{
|
||||
throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
};
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions TestKeeperStorage::finalize(const RequestsForSessions & expired_requests)
|
||||
void NuKeeperStorage::finalize()
|
||||
{
|
||||
if (finalized)
|
||||
throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
finalized = true;
|
||||
|
||||
ResponsesForSessions finalize_results;
|
||||
auto finish_watch = [] (const auto & watch_pair) -> ResponsesForSessions
|
||||
{
|
||||
ResponsesForSessions results;
|
||||
std::shared_ptr<Coordination::ZooKeeperWatchResponse> response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
|
||||
response->type = Coordination::SESSION;
|
||||
response->state = Coordination::EXPIRED_SESSION;
|
||||
response->error = Coordination::Error::ZSESSIONEXPIRED;
|
||||
for (const auto & [session_id, ephemerals_paths] : ephemerals)
|
||||
for (const String & ephemeral_path : ephemerals_paths)
|
||||
container.erase(ephemeral_path);
|
||||
|
||||
for (auto & watcher_session : watch_pair.second)
|
||||
results.push_back(ResponseForSession{watcher_session, response});
|
||||
return results;
|
||||
};
|
||||
|
||||
for (auto & path_watch : watches)
|
||||
{
|
||||
auto watch_responses = finish_watch(path_watch);
|
||||
finalize_results.insert(finalize_results.end(), watch_responses.begin(), watch_responses.end());
|
||||
}
|
||||
ephemerals.clear();
|
||||
|
||||
watches.clear();
|
||||
for (auto & path_watch : list_watches)
|
||||
{
|
||||
auto list_watch_responses = finish_watch(path_watch);
|
||||
finalize_results.insert(finalize_results.end(), list_watch_responses.begin(), list_watch_responses.end());
|
||||
}
|
||||
list_watches.clear();
|
||||
sessions_and_watchers.clear();
|
||||
|
||||
for (const auto & [session_id, zk_request] : expired_requests)
|
||||
{
|
||||
auto response = zk_request->makeResponse();
|
||||
response->error = Coordination::Error::ZSESSIONEXPIRED;
|
||||
finalize_results.push_back(ResponseForSession{session_id, response});
|
||||
}
|
||||
return finalize_results;
|
||||
session_expiry_queue.clear();
|
||||
}
|
||||
|
||||
|
||||
class TestKeeperWrapperFactory final : private boost::noncopyable
|
||||
class NuKeeperWrapperFactory final : private boost::noncopyable
|
||||
{
|
||||
|
||||
public:
|
||||
using Creator = std::function<TestKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
|
||||
using Creator = std::function<NuKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
|
||||
using OpNumToRequest = std::unordered_map<Coordination::OpNum, Creator>;
|
||||
|
||||
static TestKeeperWrapperFactory & instance()
|
||||
static NuKeeperWrapperFactory & instance()
|
||||
{
|
||||
static TestKeeperWrapperFactory factory;
|
||||
static NuKeeperWrapperFactory factory;
|
||||
return factory;
|
||||
}
|
||||
|
||||
TestKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
|
||||
NuKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
|
||||
{
|
||||
auto it = op_num_to_request.find(zk_request->getOpNum());
|
||||
if (it == op_num_to_request.end())
|
||||
@ -596,36 +573,37 @@ public:
|
||||
|
||||
private:
|
||||
OpNumToRequest op_num_to_request;
|
||||
TestKeeperWrapperFactory();
|
||||
NuKeeperWrapperFactory();
|
||||
};
|
||||
|
||||
template<Coordination::OpNum num, typename RequestT>
|
||||
void registerTestKeeperRequestWrapper(TestKeeperWrapperFactory & factory)
|
||||
void registerNuKeeperRequestWrapper(NuKeeperWrapperFactory & factory)
|
||||
{
|
||||
factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared<RequestT>(zk_request); });
|
||||
}
|
||||
|
||||
|
||||
TestKeeperWrapperFactory::TestKeeperWrapperFactory()
|
||||
NuKeeperWrapperFactory::NuKeeperWrapperFactory()
|
||||
{
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Heartbeat, TestKeeperStorageHeartbeatRequest>(*this);
|
||||
//registerTestKeeperRequestWrapper<Coordination::OpNum::Auth, TestKeeperStorageAuthRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Close, TestKeeperStorageCloseRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Create, TestKeeperStorageCreateRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Remove, TestKeeperStorageRemoveRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Exists, TestKeeperStorageExistsRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Get, TestKeeperStorageGetRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Set, TestKeeperStorageSetRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::List, TestKeeperStorageListRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::SimpleList, TestKeeperStorageListRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Check, TestKeeperStorageCheckRequest>(*this);
|
||||
registerTestKeeperRequestWrapper<Coordination::OpNum::Multi, TestKeeperStorageMultiRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Heartbeat, NuKeeperStorageHeartbeatRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Sync, NuKeeperStorageSyncRequest>(*this);
|
||||
//registerNuKeeperRequestWrapper<Coordination::OpNum::Auth, NuKeeperStorageAuthRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Close, NuKeeperStorageCloseRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Create, NuKeeperStorageCreateRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Remove, NuKeeperStorageRemoveRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Exists, NuKeeperStorageExistsRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Get, NuKeeperStorageGetRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Set, NuKeeperStorageSetRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::List, NuKeeperStorageListRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::SimpleList, NuKeeperStorageListRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Check, NuKeeperStorageCheckRequest>(*this);
|
||||
registerNuKeeperRequestWrapper<Coordination::OpNum::Multi, NuKeeperStorageMultiRequest>(*this);
|
||||
}
|
||||
|
||||
|
||||
TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
|
||||
NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id)
|
||||
{
|
||||
TestKeeperStorage::ResponsesForSessions results;
|
||||
NuKeeperStorage::ResponsesForSessions results;
|
||||
if (zk_request->getOpNum() == Coordination::OpNum::Close)
|
||||
{
|
||||
auto it = ephemerals.find(session_id);
|
||||
@ -645,12 +623,24 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
|
||||
auto response = std::make_shared<Coordination::ZooKeeperCloseResponse>();
|
||||
response->xid = zk_request->xid;
|
||||
response->zxid = getZXID();
|
||||
session_expiry_queue.remove(session_id);
|
||||
session_and_timeout.erase(session_id);
|
||||
results.push_back(ResponseForSession{session_id, response});
|
||||
}
|
||||
else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat)
|
||||
{
|
||||
session_expiry_queue.update(session_id, session_and_timeout[session_id]);
|
||||
NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
|
||||
auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
|
||||
response->xid = zk_request->xid;
|
||||
response->zxid = getZXID();
|
||||
|
||||
results.push_back(ResponseForSession{session_id, response});
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
TestKeeperStorageRequestPtr storage_request = TestKeeperWrapperFactory::instance().get(zk_request);
|
||||
NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
|
||||
auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
|
||||
|
||||
if (zk_request->has_watch)
|
||||
@ -669,15 +659,6 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
|
||||
watches[zk_request->getPath()].emplace_back(session_id);
|
||||
sessions_and_watchers[session_id].emplace(zk_request->getPath());
|
||||
}
|
||||
else
|
||||
{
|
||||
std::shared_ptr<Coordination::ZooKeeperWatchResponse> watch_response = std::make_shared<Coordination::ZooKeeperWatchResponse>();
|
||||
watch_response->path = zk_request->getPath();
|
||||
watch_response->xid = -1;
|
||||
watch_response->error = response->error;
|
||||
watch_response->type = Coordination::Event::NOTWATCHING;
|
||||
results.push_back(ResponseForSession{session_id, watch_response});
|
||||
}
|
||||
}
|
||||
|
||||
if (response->error == Coordination::Error::ZOK)
|
||||
@ -696,7 +677,7 @@ TestKeeperStorage::ResponsesForSessions TestKeeperStorage::processRequest(const
|
||||
}
|
||||
|
||||
|
||||
void TestKeeperStorage::clearDeadWatches(int64_t session_id)
|
||||
void NuKeeperStorage::clearDeadWatches(int64_t session_id)
|
||||
{
|
||||
auto watches_it = sessions_and_watchers.find(session_id);
|
||||
if (watches_it != sessions_and_watchers.end())
|
@ -4,27 +4,28 @@
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
#include <Coordination/SessionExpiryQueue.h>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace zkutil
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using namespace DB;
|
||||
struct TestKeeperStorageRequest;
|
||||
using TestKeeperStorageRequestPtr = std::shared_ptr<TestKeeperStorageRequest>;
|
||||
struct NuKeeperStorageRequest;
|
||||
using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
|
||||
using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
|
||||
|
||||
class TestKeeperStorage
|
||||
class NuKeeperStorage
|
||||
{
|
||||
public:
|
||||
std::atomic<int64_t> session_id_counter{0};
|
||||
int64_t session_id_counter{0};
|
||||
|
||||
struct Node
|
||||
{
|
||||
String data;
|
||||
Coordination::ACLs acls;
|
||||
Coordination::ACLs acls{};
|
||||
bool is_ephemeral = false;
|
||||
bool is_sequental = false;
|
||||
Coordination::Stat stat{};
|
||||
@ -50,6 +51,7 @@ public:
|
||||
using Container = std::map<std::string, Node>;
|
||||
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<String>>;
|
||||
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<String>>;
|
||||
using SessionAndTimeout = std::unordered_map<int64_t, long>;
|
||||
using SessionIDs = std::vector<int64_t>;
|
||||
|
||||
using Watches = std::map<String /* path, relative of root_path */, SessionIDs>;
|
||||
@ -57,9 +59,11 @@ public:
|
||||
Container container;
|
||||
Ephemerals ephemerals;
|
||||
SessionAndWatcher sessions_and_watchers;
|
||||
SessionExpiryQueue session_expiry_queue;
|
||||
SessionAndTimeout session_and_timeout;
|
||||
|
||||
std::atomic<int64_t> zxid{0};
|
||||
std::atomic<bool> finalized{false};
|
||||
int64_t zxid{0};
|
||||
bool finalized{false};
|
||||
|
||||
Watches watches;
|
||||
Watches list_watches; /// Watches for 'list' request (watches on children).
|
||||
@ -68,18 +72,27 @@ public:
|
||||
|
||||
int64_t getZXID()
|
||||
{
|
||||
return zxid.fetch_add(1);
|
||||
return zxid++;
|
||||
}
|
||||
|
||||
public:
|
||||
TestKeeperStorage();
|
||||
NuKeeperStorage(int64_t tick_time_ms);
|
||||
|
||||
int64_t getSessionID(int64_t session_timeout_ms)
|
||||
{
|
||||
auto result = session_id_counter++;
|
||||
session_and_timeout.emplace(result, session_timeout_ms);
|
||||
session_expiry_queue.update(result, session_timeout_ms);
|
||||
return result;
|
||||
}
|
||||
|
||||
ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
|
||||
ResponsesForSessions finalize(const RequestsForSessions & expired_requests);
|
||||
|
||||
int64_t getSessionID()
|
||||
void finalize();
|
||||
|
||||
std::unordered_set<int64_t> getDeadSessions()
|
||||
{
|
||||
return session_id_counter.fetch_add(1);
|
||||
return session_expiry_queue.getExpiredSessions();
|
||||
}
|
||||
};
|
||||
|
237
src/Coordination/NuKeeperStorageDispatcher.cpp
Normal file
237
src/Coordination/NuKeeperStorageDispatcher.cpp
Normal file
@ -0,0 +1,237 @@
|
||||
#include <Coordination/NuKeeperStorageDispatcher.h>
|
||||
#include <Common/setThreadName.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
}
|
||||
|
||||
NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
|
||||
: coordination_settings(std::make_shared<CoordinationSettings>())
|
||||
, log(&Poco::Logger::get("NuKeeperDispatcher"))
|
||||
{
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::requestThread()
|
||||
{
|
||||
setThreadName("NuKeeperReqT");
|
||||
while (!shutdown_called)
|
||||
{
|
||||
NuKeeperStorage::RequestForSession request;
|
||||
|
||||
UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
|
||||
|
||||
if (requests_queue.tryPop(request, max_wait))
|
||||
{
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
server->putRequest(request);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::responseThread()
|
||||
{
|
||||
setThreadName("NuKeeperRspT");
|
||||
while (!shutdown_called)
|
||||
{
|
||||
NuKeeperStorage::ResponseForSession response_for_session;
|
||||
|
||||
UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
|
||||
|
||||
if (responses_queue.tryPop(response_for_session, max_wait))
|
||||
{
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
setResponse(response_for_session.session_id, response_for_session.response);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
auto session_writer = session_to_response_callback.find(session_id);
|
||||
if (session_writer == session_to_response_callback.end())
|
||||
return;
|
||||
|
||||
session_writer->second(response);
|
||||
/// Session closed, no more writes
|
||||
if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close)
|
||||
session_to_response_callback.erase(session_writer);
|
||||
}
|
||||
|
||||
bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
if (session_to_response_callback.count(session_id) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
NuKeeperStorage::RequestForSession request_info;
|
||||
request_info.request = request;
|
||||
request_info.session_id = session_id;
|
||||
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
/// Put close requests without timeouts
|
||||
if (request->getOpNum() == Coordination::OpNum::Close)
|
||||
requests_queue.push(std::move(request_info));
|
||||
else if (!requests_queue.tryPush(std::move(request_info), coordination_settings->operation_timeout_ms.totalMilliseconds()))
|
||||
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
return true;
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
LOG_DEBUG(log, "Initializing storage dispatcher");
|
||||
int myid = config.getInt("test_keeper_server.server_id");
|
||||
|
||||
coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
|
||||
|
||||
server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue);
|
||||
try
|
||||
{
|
||||
LOG_DEBUG(log, "Waiting server to initialize");
|
||||
server->startup();
|
||||
LOG_DEBUG(log, "Server initialized, waiting for quorum");
|
||||
|
||||
server->waitInit();
|
||||
LOG_DEBUG(log, "Quorum initialized");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
throw;
|
||||
}
|
||||
|
||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
||||
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
|
||||
|
||||
LOG_DEBUG(log, "Dispatcher initialized");
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::shutdown()
|
||||
{
|
||||
try
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
|
||||
if (shutdown_called)
|
||||
return;
|
||||
|
||||
LOG_DEBUG(log, "Shutting down storage dispatcher");
|
||||
shutdown_called = true;
|
||||
|
||||
if (session_cleaner_thread.joinable())
|
||||
session_cleaner_thread.join();
|
||||
|
||||
if (request_thread.joinable())
|
||||
request_thread.join();
|
||||
|
||||
if (responses_thread.joinable())
|
||||
responses_thread.join();
|
||||
}
|
||||
|
||||
if (server)
|
||||
server->shutdown();
|
||||
|
||||
NuKeeperStorage::RequestForSession request_for_session;
|
||||
while (requests_queue.tryPop(request_for_session))
|
||||
{
|
||||
auto response = request_for_session.request->makeResponse();
|
||||
response->error = Coordination::Error::ZSESSIONEXPIRED;
|
||||
setResponse(request_for_session.session_id, response);
|
||||
}
|
||||
session_to_response_callback.clear();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Dispatcher shut down");
|
||||
}
|
||||
|
||||
NuKeeperStorageDispatcher::~NuKeeperStorageDispatcher()
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
if (!session_to_response_callback.try_emplace(session_id, callback).second)
|
||||
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::sessionCleanerTask()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
if (isLeader())
|
||||
{
|
||||
auto dead_sessions = server->getDeadSessions();
|
||||
for (int64_t dead_session : dead_sessions)
|
||||
{
|
||||
LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
|
||||
Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
|
||||
request->xid = Coordination::CLOSE_XID;
|
||||
NuKeeperStorage::RequestForSession request_info;
|
||||
request_info.request = request;
|
||||
request_info.session_id = dead_session;
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
requests_queue.push(std::move(request_info));
|
||||
}
|
||||
finishSession(dead_session);
|
||||
LOG_INFO(log, "Dead session close request pushed");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(coordination_settings->dead_session_check_period_ms.totalMilliseconds()));
|
||||
}
|
||||
}
|
||||
|
||||
void NuKeeperStorageDispatcher::finishSession(int64_t session_id)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
auto session_it = session_to_response_callback.find(session_id);
|
||||
if (session_it != session_to_response_callback.end())
|
||||
session_to_response_callback.erase(session_it);
|
||||
}
|
||||
|
||||
}
|
89
src/Coordination/NuKeeperStorageDispatcher.h
Normal file
89
src/Coordination/NuKeeperStorageDispatcher.h
Normal file
@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NURAFT
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <functional>
|
||||
#include <Coordination/NuKeeperServer.h>
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
|
||||
|
||||
class NuKeeperStorageDispatcher
|
||||
{
|
||||
|
||||
private:
|
||||
std::mutex push_request_mutex;
|
||||
|
||||
CoordinationSettingsPtr coordination_settings;
|
||||
using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
|
||||
RequestsQueue requests_queue{1};
|
||||
ResponsesQueue responses_queue;
|
||||
std::atomic<bool> shutdown_called{false};
|
||||
using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
|
||||
|
||||
std::mutex session_to_response_callback_mutex;
|
||||
SessionToResponseCallback session_to_response_callback;
|
||||
|
||||
ThreadFromGlobalPool request_thread;
|
||||
ThreadFromGlobalPool responses_thread;
|
||||
|
||||
ThreadFromGlobalPool session_cleaner_thread;
|
||||
|
||||
std::unique_ptr<NuKeeperServer> server;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
private:
|
||||
void requestThread();
|
||||
void responseThread();
|
||||
void sessionCleanerTask();
|
||||
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
|
||||
|
||||
public:
|
||||
NuKeeperStorageDispatcher();
|
||||
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
void shutdown();
|
||||
|
||||
~NuKeeperStorageDispatcher();
|
||||
|
||||
bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
|
||||
|
||||
bool isLeader() const
|
||||
{
|
||||
return server->isLeader();
|
||||
}
|
||||
|
||||
bool hasLeader() const
|
||||
{
|
||||
return server->isLeaderAlive();
|
||||
}
|
||||
|
||||
int64_t getSessionID(long session_timeout_ms)
|
||||
{
|
||||
return server->getSessionID(session_timeout_ms);
|
||||
}
|
||||
|
||||
void registerSession(int64_t session_id, ZooKeeperResponseCallback callback);
|
||||
/// Call if we don't need any responses for this session no more (session was expired)
|
||||
void finishSession(int64_t session_id);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
87
src/Coordination/NuKeeperStorageSerializer.cpp
Normal file
87
src/Coordination/NuKeeperStorageSerializer.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
#include <Coordination/NuKeeperStorageSerializer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
void writeNode(const NuKeeperStorage::Node & node, WriteBuffer & out)
|
||||
{
|
||||
Coordination::write(node.data, out);
|
||||
Coordination::write(node.acls, out);
|
||||
Coordination::write(node.is_ephemeral, out);
|
||||
Coordination::write(node.is_sequental, out);
|
||||
Coordination::write(node.stat, out);
|
||||
Coordination::write(node.seq_num, out);
|
||||
}
|
||||
|
||||
void readNode(NuKeeperStorage::Node & node, ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(node.data, in);
|
||||
Coordination::read(node.acls, in);
|
||||
Coordination::read(node.is_ephemeral, in);
|
||||
Coordination::read(node.is_sequental, in);
|
||||
Coordination::read(node.stat, in);
|
||||
Coordination::read(node.seq_num, in);
|
||||
}
|
||||
}
|
||||
|
||||
void NuKeeperStorageSerializer::serialize(const NuKeeperStorage & storage, WriteBuffer & out)
|
||||
{
|
||||
Coordination::write(storage.zxid, out);
|
||||
Coordination::write(storage.session_id_counter, out);
|
||||
Coordination::write(storage.container.size(), out);
|
||||
for (const auto & [path, node] : storage.container)
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
writeNode(node, out);
|
||||
}
|
||||
Coordination::write(storage.ephemerals.size(), out);
|
||||
for (const auto & [session_id, paths] : storage.ephemerals)
|
||||
{
|
||||
Coordination::write(session_id, out);
|
||||
Coordination::write(paths.size(), out);
|
||||
for (const auto & path : paths)
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
}
|
||||
|
||||
void NuKeeperStorageSerializer::deserialize(NuKeeperStorage & storage, ReadBuffer & in)
|
||||
{
|
||||
int64_t session_id_counter, zxid;
|
||||
Coordination::read(zxid, in);
|
||||
Coordination::read(session_id_counter, in);
|
||||
storage.zxid = zxid;
|
||||
storage.session_id_counter = session_id_counter;
|
||||
|
||||
size_t container_size;
|
||||
Coordination::read(container_size, in);
|
||||
while (storage.container.size() < container_size)
|
||||
{
|
||||
std::string path;
|
||||
Coordination::read(path, in);
|
||||
NuKeeperStorage::Node node;
|
||||
readNode(node, in);
|
||||
storage.container[path] = node;
|
||||
}
|
||||
size_t ephemerals_size;
|
||||
Coordination::read(ephemerals_size, in);
|
||||
while (storage.ephemerals.size() < ephemerals_size)
|
||||
{
|
||||
int64_t session_id;
|
||||
size_t ephemerals_for_session;
|
||||
Coordination::read(session_id, in);
|
||||
Coordination::read(ephemerals_for_session, in);
|
||||
while (storage.ephemerals[session_id].size() < ephemerals_for_session)
|
||||
{
|
||||
std::string ephemeral_path;
|
||||
Coordination::read(ephemeral_path, in);
|
||||
storage.ephemerals[session_id].emplace(ephemeral_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
17
src/Coordination/NuKeeperStorageSerializer.h
Normal file
17
src/Coordination/NuKeeperStorageSerializer.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include <Coordination/NuKeeperStorage.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class NuKeeperStorageSerializer
|
||||
{
|
||||
public:
|
||||
static void serialize(const NuKeeperStorage & storage, WriteBuffer & out);
|
||||
|
||||
static void deserialize(NuKeeperStorage & storage, ReadBuffer & in);
|
||||
};
|
||||
|
||||
}
|
20
src/Coordination/ReadBufferFromNuraftBuffer.h
Normal file
20
src/Coordination/ReadBufferFromNuraftBuffer.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBufferFromNuraftBuffer : public ReadBufferFromMemory
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferFromNuraftBuffer(nuraft::ptr<nuraft::buffer> buffer)
|
||||
: ReadBufferFromMemory(buffer->data_begin(), buffer->size())
|
||||
{}
|
||||
explicit ReadBufferFromNuraftBuffer(nuraft::buffer & buffer)
|
||||
: ReadBufferFromMemory(buffer.data_begin(), buffer.size())
|
||||
{}
|
||||
};
|
||||
|
||||
}
|
83
src/Coordination/SessionExpiryQueue.cpp
Normal file
83
src/Coordination/SessionExpiryQueue.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <Coordination/SessionExpiryQueue.h>
|
||||
#include <common/logger_useful.h>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool SessionExpiryQueue::remove(int64_t session_id)
|
||||
{
|
||||
auto session_it = session_to_timeout.find(session_id);
|
||||
if (session_it != session_to_timeout.end())
|
||||
{
|
||||
auto set_it = expiry_to_sessions.find(session_it->second);
|
||||
if (set_it != expiry_to_sessions.end())
|
||||
set_it->second.erase(session_id);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms)
|
||||
{
|
||||
auto session_it = session_to_timeout.find(session_id);
|
||||
int64_t now = getNowMilliseconds();
|
||||
int64_t new_expiry_time = roundToNextInterval(now + timeout_ms);
|
||||
|
||||
if (session_it != session_to_timeout.end())
|
||||
{
|
||||
if (new_expiry_time == session_it->second)
|
||||
return false;
|
||||
|
||||
auto set_it = expiry_to_sessions.find(new_expiry_time);
|
||||
if (set_it == expiry_to_sessions.end())
|
||||
std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
|
||||
|
||||
set_it->second.insert(session_id);
|
||||
int64_t prev_expiry_time = session_it->second;
|
||||
|
||||
if (prev_expiry_time != new_expiry_time)
|
||||
{
|
||||
auto prev_set_it = expiry_to_sessions.find(prev_expiry_time);
|
||||
if (prev_set_it != expiry_to_sessions.end())
|
||||
prev_set_it->second.erase(session_id);
|
||||
}
|
||||
session_it->second = new_expiry_time;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
session_to_timeout[session_id] = new_expiry_time;
|
||||
auto set_it = expiry_to_sessions.find(new_expiry_time);
|
||||
if (set_it == expiry_to_sessions.end())
|
||||
std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
|
||||
set_it->second.insert(session_id);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
|
||||
{
|
||||
int64_t now = getNowMilliseconds();
|
||||
if (now < next_expiration_time)
|
||||
return {};
|
||||
|
||||
auto set_it = expiry_to_sessions.find(next_expiration_time);
|
||||
int64_t new_expiration_time = next_expiration_time + expiration_interval;
|
||||
next_expiration_time = new_expiration_time;
|
||||
if (set_it != expiry_to_sessions.end())
|
||||
{
|
||||
auto result = set_it->second;
|
||||
expiry_to_sessions.erase(set_it);
|
||||
return result;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void SessionExpiryQueue::clear()
|
||||
{
|
||||
session_to_timeout.clear();
|
||||
expiry_to_sessions.clear();
|
||||
}
|
||||
|
||||
}
|
45
src/Coordination/SessionExpiryQueue.h
Normal file
45
src/Coordination/SessionExpiryQueue.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <chrono>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SessionExpiryQueue
|
||||
{
|
||||
private:
|
||||
std::unordered_map<int64_t, int64_t> session_to_timeout;
|
||||
std::unordered_map<int64_t, std::unordered_set<int64_t>> expiry_to_sessions;
|
||||
|
||||
int64_t expiration_interval;
|
||||
int64_t next_expiration_time;
|
||||
|
||||
static int64_t getNowMilliseconds()
|
||||
{
|
||||
using namespace std::chrono;
|
||||
return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
int64_t roundToNextInterval(int64_t time) const
|
||||
{
|
||||
return (time / expiration_interval + 1) * expiration_interval;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit SessionExpiryQueue(int64_t expiration_interval_)
|
||||
: expiration_interval(expiration_interval_)
|
||||
, next_expiration_time(roundToNextInterval(getNowMilliseconds()))
|
||||
{
|
||||
}
|
||||
|
||||
bool remove(int64_t session_id);
|
||||
|
||||
bool update(int64_t session_id, int64_t timeout_ms);
|
||||
|
||||
std::unordered_set<int64_t> getExpiredSessions();
|
||||
|
||||
void clear();
|
||||
};
|
||||
|
||||
}
|
167
src/Coordination/SummingStateMachine.cpp
Normal file
167
src/Coordination/SummingStateMachine.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
#include <Coordination/SummingStateMachine.h>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr int MAX_SNAPSHOTS = 3;
|
||||
|
||||
static int64_t deserializeValue(nuraft::buffer & buffer)
|
||||
{
|
||||
nuraft::buffer_serializer bs(buffer);
|
||||
int64_t result;
|
||||
memcpy(&result, bs.get_raw(buffer.size()), sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
SummingStateMachine::SummingStateMachine()
|
||||
: value(0)
|
||||
, last_committed_idx(0)
|
||||
{
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> SummingStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
int64_t value_to_add = deserializeValue(data);
|
||||
|
||||
value += value_to_add;
|
||||
last_committed_idx = log_idx;
|
||||
|
||||
// Return Raft log number as a return result.
|
||||
nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(log_idx));
|
||||
nuraft::buffer_serializer bs(ret);
|
||||
bs.put_u64(log_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SummingStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
{
|
||||
std::lock_guard<std::mutex> ll(snapshots_lock);
|
||||
auto entry = snapshots.find(s.get_last_log_idx());
|
||||
if (entry == snapshots.end())
|
||||
return false;
|
||||
|
||||
auto ctx = entry->second;
|
||||
value = ctx->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> SummingStateMachine::last_snapshot()
|
||||
{
|
||||
// Just return the latest snapshot.
|
||||
std::lock_guard<std::mutex> ll(snapshots_lock);
|
||||
auto entry = snapshots.rbegin();
|
||||
if (entry == snapshots.rend())
|
||||
return nullptr;
|
||||
|
||||
auto ctx = entry->second;
|
||||
return ctx->snapshot;
|
||||
}
|
||||
|
||||
|
||||
void SummingStateMachine::createSnapshotInternal(nuraft::snapshot & s)
|
||||
{
|
||||
// Clone snapshot from `s`.
|
||||
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
||||
nuraft::ptr<nuraft::snapshot> ss = nuraft::snapshot::deserialize(*snp_buf);
|
||||
|
||||
// Put into snapshot map.
|
||||
auto ctx = cs_new<SingleValueSnapshotContext>(ss, value);
|
||||
snapshots[s.get_last_log_idx()] = ctx;
|
||||
|
||||
// Maintain last 3 snapshots only.
|
||||
int num = snapshots.size();
|
||||
auto entry = snapshots.begin();
|
||||
|
||||
for (int ii = 0; ii < num - MAX_SNAPSHOTS; ++ii)
|
||||
{
|
||||
if (entry == snapshots.end())
|
||||
break;
|
||||
entry = snapshots.erase(entry);
|
||||
}
|
||||
}
|
||||
|
||||
void SummingStateMachine::save_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
size_t & obj_id,
|
||||
nuraft::buffer & data,
|
||||
bool /*is_first_obj*/,
|
||||
bool /*is_last_obj*/)
|
||||
{
|
||||
if (obj_id == 0)
|
||||
{
|
||||
// Object ID == 0: it contains dummy value, create snapshot context.
|
||||
createSnapshotInternal(s);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Object ID > 0: actual snapshot value.
|
||||
nuraft::buffer_serializer bs(data);
|
||||
int64_t local_value = static_cast<int64_t>(bs.get_u64());
|
||||
|
||||
std::lock_guard<std::mutex> ll(snapshots_lock);
|
||||
auto entry = snapshots.find(s.get_last_log_idx());
|
||||
assert(entry != snapshots.end());
|
||||
entry->second->value = local_value;
|
||||
}
|
||||
// Request next object.
|
||||
obj_id++;
|
||||
}
|
||||
|
||||
int SummingStateMachine::read_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
void* & /*user_snp_ctx*/,
|
||||
size_t obj_id,
|
||||
nuraft::ptr<nuraft::buffer> & data_out,
|
||||
bool & is_last_obj)
|
||||
{
|
||||
nuraft::ptr<SingleValueSnapshotContext> ctx = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> ll(snapshots_lock);
|
||||
auto entry = snapshots.find(s.get_last_log_idx());
|
||||
if (entry == snapshots.end())
|
||||
{
|
||||
// Snapshot doesn't exist.
|
||||
data_out = nullptr;
|
||||
is_last_obj = true;
|
||||
return 0;
|
||||
}
|
||||
ctx = entry->second;
|
||||
}
|
||||
|
||||
if (obj_id == 0)
|
||||
{
|
||||
// Object ID == 0: first object, put dummy data.
|
||||
data_out = nuraft::buffer::alloc(sizeof(Int32));
|
||||
nuraft::buffer_serializer bs(data_out);
|
||||
bs.put_i32(0);
|
||||
is_last_obj = false;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// Object ID > 0: second object, put actual value.
|
||||
data_out = nuraft::buffer::alloc(sizeof(size_t));
|
||||
nuraft::buffer_serializer bs(data_out);
|
||||
bs.put_u64(ctx->value);
|
||||
is_last_obj = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SummingStateMachine::create_snapshot(
|
||||
nuraft::snapshot & s,
|
||||
nuraft::async_result<bool>::handler_type & when_done)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> ll(snapshots_lock);
|
||||
createSnapshotInternal(s);
|
||||
}
|
||||
nuraft::ptr<std::exception> except(nullptr);
|
||||
bool ret = true;
|
||||
when_done(ret, except);
|
||||
}
|
||||
|
||||
|
||||
}
|
78
src/Coordination/SummingStateMachine.h
Normal file
78
src/Coordination/SummingStateMachine.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <Core/Types.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Example trivial state machine.
|
||||
class SummingStateMachine : public nuraft::state_machine
|
||||
{
|
||||
public:
|
||||
SummingStateMachine();
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pre_commit(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; }
|
||||
|
||||
nuraft::ptr<nuraft::buffer> commit(const size_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
void rollback(const size_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
|
||||
|
||||
size_t last_commit_index() override { return last_committed_idx; }
|
||||
|
||||
bool apply_snapshot(nuraft::snapshot & s) override;
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> last_snapshot() override;
|
||||
|
||||
void create_snapshot(
|
||||
nuraft::snapshot & s,
|
||||
nuraft::async_result<bool>::handler_type & when_done) override;
|
||||
|
||||
void save_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
size_t & obj_id,
|
||||
nuraft::buffer & data,
|
||||
bool is_first_obj,
|
||||
bool is_last_obj) override;
|
||||
|
||||
int read_logical_snp_obj(
|
||||
nuraft::snapshot & s,
|
||||
void* & user_snp_ctx,
|
||||
size_t obj_id,
|
||||
nuraft::ptr<nuraft::buffer> & data_out,
|
||||
bool & is_last_obj) override;
|
||||
|
||||
int64_t getValue() const { return value; }
|
||||
|
||||
private:
|
||||
struct SingleValueSnapshotContext
|
||||
{
|
||||
SingleValueSnapshotContext(nuraft::ptr<nuraft::snapshot> & s, int64_t v)
|
||||
: snapshot(s)
|
||||
, value(v)
|
||||
{}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> snapshot;
|
||||
int64_t value;
|
||||
};
|
||||
|
||||
void createSnapshotInternal(nuraft::snapshot & s);
|
||||
|
||||
// State machine's current value.
|
||||
std::atomic<int64_t> value;
|
||||
|
||||
// Last committed Raft log number.
|
||||
std::atomic<uint64_t> last_committed_idx;
|
||||
|
||||
// Keeps the last 3 snapshots, by their Raft log numbers.
|
||||
std::map<uint64_t, nuraft::ptr<SingleValueSnapshotContext>> snapshots;
|
||||
|
||||
// Mutex for `snapshots_`.
|
||||
std::mutex snapshots_lock;
|
||||
|
||||
};
|
||||
|
||||
}
|
45
src/Coordination/ThreadSafeQueue.h
Normal file
45
src/Coordination/ThreadSafeQueue.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include <queue>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Queue with mutex and condvar. As simple as possible.
|
||||
template <typename T>
|
||||
class ThreadSafeQueue
|
||||
{
|
||||
private:
|
||||
mutable std::mutex queue_mutex;
|
||||
std::condition_variable cv;
|
||||
std::queue<T> queue;
|
||||
public:
|
||||
|
||||
void push(const T & response)
|
||||
{
|
||||
std::lock_guard lock(queue_mutex);
|
||||
queue.push(response);
|
||||
cv.notify_one();
|
||||
}
|
||||
|
||||
bool tryPop(T & response, int64_t timeout_ms = 0)
|
||||
{
|
||||
std::unique_lock lock(queue_mutex);
|
||||
if (!cv.wait_for(lock,
|
||||
std::chrono::milliseconds(timeout_ms), [this] { return !queue.empty(); }))
|
||||
return false;
|
||||
|
||||
response = queue.front();
|
||||
queue.pop();
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
std::lock_guard lock(queue_mutex);
|
||||
return queue.size();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
71
src/Coordination/WriteBufferFromNuraftBuffer.cpp
Normal file
71
src/Coordination/WriteBufferFromNuraftBuffer.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include <Coordination/WriteBufferFromNuraftBuffer.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
|
||||
}
|
||||
|
||||
void WriteBufferFromNuraftBuffer::nextImpl()
|
||||
{
|
||||
if (is_finished)
|
||||
throw Exception("WriteBufferFromNuraftBuffer is finished", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
|
||||
|
||||
/// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data
|
||||
size_t pos_offset = pos - reinterpret_cast<Position>(buffer->data_begin());
|
||||
size_t old_size = buffer->size();
|
||||
if (pos_offset == old_size)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(old_size * size_multiplier);
|
||||
memcpy(new_buffer->data_begin(), buffer->data_begin(), buffer->size());
|
||||
buffer = new_buffer;
|
||||
}
|
||||
internal_buffer = Buffer(reinterpret_cast<Position>(buffer->data_begin() + pos_offset), reinterpret_cast<Position>(buffer->data_begin() + buffer->size()));
|
||||
working_buffer = internal_buffer;
|
||||
|
||||
}
|
||||
|
||||
WriteBufferFromNuraftBuffer::WriteBufferFromNuraftBuffer()
|
||||
: WriteBuffer(nullptr, 0)
|
||||
{
|
||||
buffer = nuraft::buffer::alloc(initial_size);
|
||||
set(reinterpret_cast<Position>(buffer->data_begin()), buffer->size());
|
||||
}
|
||||
|
||||
void WriteBufferFromNuraftBuffer::finalize()
|
||||
{
|
||||
if (is_finished)
|
||||
return;
|
||||
|
||||
is_finished = true;
|
||||
size_t real_size = pos - reinterpret_cast<Position>(buffer->data_begin());
|
||||
nuraft::ptr<nuraft::buffer> new_buffer = nuraft::buffer::alloc(real_size);
|
||||
memcpy(new_buffer->data_begin(), buffer->data_begin(), real_size);
|
||||
buffer = new_buffer;
|
||||
|
||||
/// Prevent further writes.
|
||||
set(nullptr, 0);
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> WriteBufferFromNuraftBuffer::getBuffer()
|
||||
{
|
||||
finalize();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
WriteBufferFromNuraftBuffer::~WriteBufferFromNuraftBuffer()
|
||||
{
|
||||
try
|
||||
{
|
||||
finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
30
src/Coordination/WriteBufferFromNuraftBuffer.h
Normal file
30
src/Coordination/WriteBufferFromNuraftBuffer.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class WriteBufferFromNuraftBuffer : public WriteBuffer
|
||||
{
|
||||
private:
|
||||
nuraft::ptr<nuraft::buffer> buffer;
|
||||
bool is_finished = false;
|
||||
|
||||
static constexpr size_t initial_size = 32;
|
||||
static constexpr size_t size_multiplier = 2;
|
||||
|
||||
void nextImpl() override;
|
||||
|
||||
public:
|
||||
WriteBufferFromNuraftBuffer();
|
||||
|
||||
void finalize() override final;
|
||||
nuraft::ptr<nuraft::buffer> getBuffer();
|
||||
bool isFinished() const { return is_finished; }
|
||||
|
||||
~WriteBufferFromNuraftBuffer() override;
|
||||
};
|
||||
|
||||
}
|
336
src/Coordination/tests/gtest_for_build.cpp
Normal file
336
src/Coordination/tests/gtest_for_build.cpp
Normal file
@ -0,0 +1,336 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NURAFT
|
||||
|
||||
#include <Coordination/InMemoryLogStore.h>
|
||||
#include <Coordination/InMemoryStateManager.h>
|
||||
#include <Coordination/NuKeeperStorageSerializer.h>
|
||||
#include <Coordination/SummingStateMachine.h>
|
||||
#include <Coordination/NuKeeperStateMachine.h>
|
||||
#include <Coordination/LoggerWrapper.h>
|
||||
#include <Coordination/WriteBufferFromNuraftBuffer.h>
|
||||
#include <Coordination/ReadBufferFromNuraftBuffer.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <libnuraft/nuraft.hxx> // Y_IGNORE
|
||||
#include <thread>
|
||||
|
||||
|
||||
TEST(CoordinationTest, BuildTest)
|
||||
{
|
||||
DB::InMemoryLogStore store;
|
||||
DB::SummingStateMachine machine;
|
||||
EXPECT_EQ(1, 1);
|
||||
}
|
||||
|
||||
TEST(CoordinationTest, BufferSerde)
|
||||
{
|
||||
Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Get);
|
||||
request->xid = 3;
|
||||
dynamic_cast<Coordination::ZooKeeperGetRequest *>(request.get())->path = "/path/value";
|
||||
|
||||
DB::WriteBufferFromNuraftBuffer wbuf;
|
||||
request->write(wbuf);
|
||||
auto nuraft_buffer = wbuf.getBuffer();
|
||||
EXPECT_EQ(nuraft_buffer->size(), 28);
|
||||
|
||||
DB::ReadBufferFromNuraftBuffer rbuf(nuraft_buffer);
|
||||
|
||||
int32_t length;
|
||||
Coordination::read(length, rbuf);
|
||||
EXPECT_EQ(length + sizeof(length), nuraft_buffer->size());
|
||||
|
||||
int32_t xid;
|
||||
Coordination::read(xid, rbuf);
|
||||
EXPECT_EQ(xid, request->xid);
|
||||
|
||||
Coordination::OpNum opnum;
|
||||
Coordination::read(opnum, rbuf);
|
||||
|
||||
Coordination::ZooKeeperRequestPtr request_read = Coordination::ZooKeeperRequestFactory::instance().get(opnum);
|
||||
request_read->xid = xid;
|
||||
request_read->readImpl(rbuf);
|
||||
|
||||
EXPECT_EQ(request_read->getOpNum(), Coordination::OpNum::Get);
|
||||
EXPECT_EQ(request_read->xid, 3);
|
||||
EXPECT_EQ(dynamic_cast<Coordination::ZooKeeperGetRequest *>(request_read.get())->path, "/path/value");
|
||||
}
|
||||
|
||||
template <typename StateMachine>
|
||||
struct SimpliestRaftServer
|
||||
{
|
||||
SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_)
|
||||
: server_id(server_id_)
|
||||
, hostname(hostname_)
|
||||
, port(port_)
|
||||
, endpoint(hostname + ":" + std::to_string(port))
|
||||
, state_machine(nuraft::cs_new<StateMachine>())
|
||||
, state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, hostname, port))
|
||||
{
|
||||
nuraft::raft_params params;
|
||||
params.heart_beat_interval_ = 100;
|
||||
params.election_timeout_lower_bound_ = 200;
|
||||
params.election_timeout_upper_bound_ = 400;
|
||||
params.reserved_log_items_ = 5;
|
||||
params.snapshot_distance_ = 1; /// forcefully send snapshots
|
||||
params.client_req_timeout_ = 3000;
|
||||
params.return_method_ = nuraft::raft_params::blocking;
|
||||
|
||||
raft_instance = launcher.init(
|
||||
state_machine, state_manager, nuraft::cs_new<DB::LoggerWrapper>("ToyRaftLogger", DB::LogsLevel::trace), port,
|
||||
nuraft::asio_service::options{}, params);
|
||||
|
||||
if (!raft_instance)
|
||||
{
|
||||
std::cerr << "Failed to initialize launcher (see the message "
|
||||
"in the log file)." << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
std::cout << "init Raft instance " << server_id;
|
||||
for (size_t ii = 0; ii < 20; ++ii)
|
||||
{
|
||||
if (raft_instance->is_initialized())
|
||||
{
|
||||
std::cout << " done" << std::endl;
|
||||
break;
|
||||
}
|
||||
std::cout << ".";
|
||||
fflush(stdout);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
// Server ID.
|
||||
int server_id;
|
||||
|
||||
// Server address.
|
||||
std::string hostname;
|
||||
|
||||
// Server port.
|
||||
int port;
|
||||
|
||||
std::string endpoint;
|
||||
|
||||
// State machine.
|
||||
nuraft::ptr<StateMachine> state_machine;
|
||||
|
||||
// State manager.
|
||||
nuraft::ptr<nuraft::state_mgr> state_manager;
|
||||
|
||||
// Raft launcher.
|
||||
nuraft::raft_launcher launcher;
|
||||
|
||||
// Raft server instance.
|
||||
nuraft::ptr<nuraft::raft_server> raft_instance;
|
||||
};
|
||||
|
||||
using SummingRaftServer = SimpliestRaftServer<DB::SummingStateMachine>;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
|
||||
{
|
||||
nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
|
||||
nuraft::buffer_serializer bs(ret);
|
||||
// WARNING: We don't consider endian-safety in this example.
|
||||
bs.put_raw(&number, sizeof(number));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
TEST(CoordinationTest, TestSummingRaft1)
|
||||
{
|
||||
SummingRaftServer s1(1, "localhost", 44444);
|
||||
|
||||
/// Single node is leader
|
||||
EXPECT_EQ(s1.raft_instance->get_leader(), 1);
|
||||
|
||||
auto entry1 = getLogEntry(143);
|
||||
auto ret = s1.raft_instance->append_entries({entry1});
|
||||
EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
|
||||
EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
|
||||
|
||||
while (s1.state_machine->getValue() != 143)
|
||||
{
|
||||
std::cout << "Waiting s1 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
EXPECT_EQ(s1.state_machine->getValue(), 143);
|
||||
|
||||
s1.launcher.shutdown(5);
|
||||
}
|
||||
|
||||
TEST(CoordinationTest, TestSummingRaft3)
|
||||
{
|
||||
SummingRaftServer s1(1, "localhost", 44444);
|
||||
SummingRaftServer s2(2, "localhost", 44445);
|
||||
SummingRaftServer s3(3, "localhost", 44446);
|
||||
|
||||
nuraft::srv_config first_config(1, "localhost:44444");
|
||||
auto ret1 = s2.raft_instance->add_srv(first_config);
|
||||
if (!ret1->get_accepted())
|
||||
{
|
||||
std::cout << "failed to add server: "
|
||||
<< ret1->get_result_str() << std::endl;
|
||||
EXPECT_TRUE(false);
|
||||
}
|
||||
|
||||
while (s1.raft_instance->get_leader() != 2)
|
||||
{
|
||||
std::cout << "Waiting s1 to join to s2 quorum\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
nuraft::srv_config third_config(3, "localhost:44446");
|
||||
auto ret3 = s2.raft_instance->add_srv(third_config);
|
||||
if (!ret3->get_accepted())
|
||||
{
|
||||
std::cout << "failed to add server: "
|
||||
<< ret3->get_result_str() << std::endl;
|
||||
EXPECT_TRUE(false);
|
||||
}
|
||||
|
||||
while (s3.raft_instance->get_leader() != 2)
|
||||
{
|
||||
std::cout << "Waiting s3 to join to s2 quorum\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
/// S2 is leader
|
||||
EXPECT_EQ(s1.raft_instance->get_leader(), 2);
|
||||
EXPECT_EQ(s2.raft_instance->get_leader(), 2);
|
||||
EXPECT_EQ(s3.raft_instance->get_leader(), 2);
|
||||
|
||||
std::cerr << "Starting to add entries\n";
|
||||
auto entry = getLogEntry(1);
|
||||
auto ret = s2.raft_instance->append_entries({entry});
|
||||
EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
|
||||
EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
|
||||
|
||||
while (s1.state_machine->getValue() != 1)
|
||||
{
|
||||
std::cout << "Waiting s1 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
while (s2.state_machine->getValue() != 1)
|
||||
{
|
||||
std::cout << "Waiting s2 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
while (s3.state_machine->getValue() != 1)
|
||||
{
|
||||
std::cout << "Waiting s3 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
EXPECT_EQ(s1.state_machine->getValue(), 1);
|
||||
EXPECT_EQ(s2.state_machine->getValue(), 1);
|
||||
EXPECT_EQ(s3.state_machine->getValue(), 1);
|
||||
|
||||
auto non_leader_entry = getLogEntry(3);
|
||||
auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
|
||||
|
||||
EXPECT_FALSE(ret_non_leader1->get_accepted());
|
||||
|
||||
auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
|
||||
|
||||
EXPECT_FALSE(ret_non_leader3->get_accepted());
|
||||
|
||||
auto leader_entry = getLogEntry(77);
|
||||
auto ret_leader = s2.raft_instance->append_entries({leader_entry});
|
||||
EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate: entry 78" << ret_leader->get_result_code();
|
||||
EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 78" << ret_leader->get_result_code();
|
||||
|
||||
while (s1.state_machine->getValue() != 78)
|
||||
{
|
||||
std::cout << "Waiting s1 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
while (s3.state_machine->getValue() != 78)
|
||||
{
|
||||
std::cout << "Waiting s3 to apply entry\n";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
EXPECT_EQ(s1.state_machine->getValue(), 78);
|
||||
EXPECT_EQ(s2.state_machine->getValue(), 78);
|
||||
EXPECT_EQ(s3.state_machine->getValue(), 78);
|
||||
|
||||
s1.launcher.shutdown(5);
|
||||
s2.launcher.shutdown(5);
|
||||
s3.launcher.shutdown(5);
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
|
||||
{
|
||||
DB::WriteBufferFromNuraftBuffer buf;
|
||||
DB::writeIntBinary(session_id, buf);
|
||||
request->write(buf);
|
||||
return buf.getBuffer();
|
||||
}
|
||||
|
||||
DB::NuKeeperStorage::ResponsesForSessions getZooKeeperResponses(nuraft::ptr<nuraft::buffer> & buffer, const Coordination::ZooKeeperRequestPtr & request)
|
||||
{
|
||||
DB::NuKeeperStorage::ResponsesForSessions results;
|
||||
DB::ReadBufferFromNuraftBuffer buf(buffer);
|
||||
while (!buf.eof())
|
||||
{
|
||||
int64_t session_id;
|
||||
DB::readIntBinary(session_id, buf);
|
||||
|
||||
int32_t length;
|
||||
Coordination::XID xid;
|
||||
int64_t zxid;
|
||||
Coordination::Error err;
|
||||
|
||||
Coordination::read(length, buf);
|
||||
Coordination::read(xid, buf);
|
||||
Coordination::read(zxid, buf);
|
||||
Coordination::read(err, buf);
|
||||
auto response = request->makeResponse();
|
||||
response->readImpl(buf);
|
||||
results.push_back(DB::NuKeeperStorage::ResponseForSession{session_id, response});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
TEST(CoordinationTest, TestStorageSerialization)
|
||||
{
|
||||
DB::NuKeeperStorage storage(500);
|
||||
storage.container["/hello"] = DB::NuKeeperStorage::Node{.data="world"};
|
||||
storage.container["/hello/somepath"] = DB::NuKeeperStorage::Node{.data="somedata"};
|
||||
storage.session_id_counter = 5;
|
||||
storage.zxid = 156;
|
||||
storage.ephemerals[3] = {"/hello", "/"};
|
||||
storage.ephemerals[1] = {"/hello/somepath"};
|
||||
|
||||
DB::WriteBufferFromOwnString buffer;
|
||||
DB::NuKeeperStorageSerializer serializer;
|
||||
serializer.serialize(storage, buffer);
|
||||
std::string serialized = buffer.str();
|
||||
EXPECT_NE(serialized.size(), 0);
|
||||
DB::ReadBufferFromString read(serialized);
|
||||
DB::NuKeeperStorage new_storage(500);
|
||||
serializer.deserialize(new_storage, read);
|
||||
|
||||
EXPECT_EQ(new_storage.container.size(), 3);
|
||||
EXPECT_EQ(new_storage.container["/hello"].data, "world");
|
||||
EXPECT_EQ(new_storage.container["/hello/somepath"].data, "somedata");
|
||||
EXPECT_EQ(new_storage.session_id_counter, 5);
|
||||
EXPECT_EQ(new_storage.zxid, 156);
|
||||
EXPECT_EQ(new_storage.ephemerals.size(), 2);
|
||||
EXPECT_EQ(new_storage.ephemerals[3].size(), 2);
|
||||
EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
|
||||
}
|
||||
|
||||
#endif
|
13
src/Coordination/ya.make
Normal file
13
src/Coordination/ya.make
Normal file
@ -0,0 +1,13 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
)
|
||||
|
||||
SRCS(
|
||||
)
|
||||
|
||||
END()
|
12
src/Coordination/ya.make.in
Normal file
12
src/Coordination/ya.make.in
Normal file
@ -0,0 +1,12 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
clickhouse/src/Common
|
||||
)
|
||||
|
||||
SRCS(
|
||||
)
|
||||
|
||||
END()
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
extern const int DECIMAL_OVERFLOW;
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataTypePtr & right_type)
|
||||
{
|
||||
if (isColumnedAsDecimal(left_type))
|
||||
@ -30,7 +30,9 @@ inline bool allowDecimalComparison(const DataTypePtr & left_type, const DataType
|
||||
return true;
|
||||
}
|
||||
else if (isNotDecimalButComparableToDecimal(left_type) && isColumnedAsDecimal(right_type))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -252,9 +254,9 @@ private:
|
||||
else
|
||||
{
|
||||
if constexpr (scale_left)
|
||||
x *= scale;
|
||||
x = common::mulIgnoreOverflow(x, scale);
|
||||
if constexpr (scale_right)
|
||||
y *= scale;
|
||||
y = common::mulIgnoreOverflow(y, scale);
|
||||
}
|
||||
|
||||
return Op::apply(x, y);
|
||||
|
@ -13,3 +13,4 @@
|
||||
#cmakedefine01 USE_LDAP
|
||||
#cmakedefine01 USE_ROCKSDB
|
||||
#cmakedefine01 USE_LIBPQXX
|
||||
#cmakedefine01 USE_NURAFT
|
||||
|
@ -120,14 +120,17 @@ public:
|
||||
return DecimalUtils::getFractionalPart(x, scale);
|
||||
}
|
||||
|
||||
T maxWholeValue() const { return getScaleMultiplier(maxPrecision() - scale) - T(1); }
|
||||
T maxWholeValue() const { return getScaleMultiplier(precision - scale) - T(1); }
|
||||
|
||||
bool canStoreWhole(T x) const
|
||||
template<typename U>
|
||||
bool canStoreWhole(U x) const
|
||||
{
|
||||
static_assert(std::is_signed_v<typename T::NativeType>);
|
||||
T max = maxWholeValue();
|
||||
if (x > max || x < -max)
|
||||
return false;
|
||||
return true;
|
||||
if constexpr (std::is_signed_v<U>)
|
||||
return -max <= x && x <= max;
|
||||
else
|
||||
return x <= static_cast<std::make_unsigned_t<typename T::NativeType>>(max.value);
|
||||
}
|
||||
|
||||
/// @returns multiplier for U to become T with correct scale
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "DiskS3.h"
|
||||
#include "Disks/DiskCacheWrapper.h"
|
||||
#include "Disks/DiskFactory.h"
|
||||
#include "Storages/StorageS3Settings.h"
|
||||
#include "ProxyConfiguration.h"
|
||||
#include "ProxyListConfiguration.h"
|
||||
#include "ProxyResolverConfiguration.h"
|
||||
@ -137,6 +138,8 @@ void registerDiskS3(DiskFactory & factory)
|
||||
uri.is_virtual_hosted_style,
|
||||
config.getString(config_prefix + ".access_key_id", ""),
|
||||
config.getString(config_prefix + ".secret_access_key", ""),
|
||||
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
|
||||
{},
|
||||
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false))
|
||||
);
|
||||
|
||||
|
@ -3,6 +3,11 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
|
||||
{
|
||||
@ -15,10 +20,18 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
|
||||
|
||||
while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
|
||||
{
|
||||
const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
|
||||
if (current_object_size > 10 * min_chunk_size)
|
||||
throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
|
||||
std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
|
||||
" bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA);
|
||||
|
||||
if (quotes)
|
||||
{
|
||||
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
|
||||
if (pos == in.buffer().end())
|
||||
if (pos > in.buffer().end())
|
||||
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
else if (pos == in.buffer().end())
|
||||
continue;
|
||||
if (*pos == '\\')
|
||||
{
|
||||
@ -35,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
|
||||
else
|
||||
{
|
||||
pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end());
|
||||
if (pos == in.buffer().end())
|
||||
if (pos > in.buffer().end())
|
||||
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
else if (pos == in.buffer().end())
|
||||
continue;
|
||||
if (*pos == '{')
|
||||
else if (*pos == '{')
|
||||
{
|
||||
++balance;
|
||||
++pos;
|
||||
|
@ -6,11 +6,11 @@
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -90,17 +90,26 @@ struct DivideIntegralImpl
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Comparisons are not strict to avoid rounding issues when operand is implicitly casted to float.
|
||||
|
||||
if constexpr (std::is_floating_point_v<A>)
|
||||
if (isNaN(a) || a > std::numeric_limits<CastA>::max() || a < std::numeric_limits<CastA>::lowest())
|
||||
if (isNaN(a) || a >= std::numeric_limits<CastA>::max() || a <= std::numeric_limits<CastA>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
if constexpr (std::is_floating_point_v<B>)
|
||||
if (isNaN(b) || b > std::numeric_limits<CastB>::max() || b < std::numeric_limits<CastB>::lowest())
|
||||
if (isNaN(b) || b >= std::numeric_limits<CastB>::max() || b <= std::numeric_limits<CastB>::lowest())
|
||||
throw Exception("Cannot perform integer division on infinite or too large floating point numbers",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
return static_cast<Result>(checkedDivision(CastA(a), CastB(b)));
|
||||
auto res = checkedDivision(CastA(a), CastB(b));
|
||||
|
||||
if constexpr (std::is_floating_point_v<decltype(res)>)
|
||||
if (isNaN(res) || res >= std::numeric_limits<Result>::max() || res <= std::numeric_limits<Result>::lowest())
|
||||
throw Exception("Cannot perform integer division, because it will produce infinite or too large number",
|
||||
ErrorCodes::ILLEGAL_DIVISION);
|
||||
|
||||
return static_cast<Result>(res);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,29 @@ struct ArrayDifferenceImpl
|
||||
}
|
||||
|
||||
|
||||
template <typename Element, typename Result>
|
||||
static void NO_SANITIZE_UNDEFINED impl(const Element * __restrict src, Result * __restrict dst, size_t begin, size_t end)
|
||||
{
|
||||
/// First element is zero, then the differences of ith and i-1th elements.
|
||||
|
||||
Element prev{};
|
||||
for (size_t pos = begin; pos < end; ++pos)
|
||||
{
|
||||
if (pos == begin)
|
||||
{
|
||||
dst[pos] = 0;
|
||||
prev = src[pos];
|
||||
}
|
||||
else
|
||||
{
|
||||
Element curr = src[pos];
|
||||
dst[pos] = curr - prev;
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Element, typename Result>
|
||||
static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr)
|
||||
{
|
||||
@ -73,14 +96,10 @@ struct ArrayDifferenceImpl
|
||||
size_t pos = 0;
|
||||
for (auto offset : offsets)
|
||||
{
|
||||
// skip empty arrays
|
||||
if (pos < offset)
|
||||
{
|
||||
res_values[pos] = 0;
|
||||
for (++pos; pos < offset; ++pos)
|
||||
res_values[pos] = static_cast<Result>(data[pos]) - static_cast<Result>(data[pos - 1]);
|
||||
}
|
||||
impl(data.data(), res_values.data(), pos, offset);
|
||||
pos = offset;
|
||||
}
|
||||
|
||||
res_ptr = ColumnArray::create(std::move(res_nested), array.getOffsetsPtr());
|
||||
return true;
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ public:
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool isSuitableForConstantFolding() const override { return false; }
|
||||
|
||||
/// We should never return LowCardinality result, cause we declare that result is always constant zero.
|
||||
/// (in getResultIfAlwaysReturnsConstantAndHasArguments)
|
||||
|
@ -45,6 +45,7 @@ void registerFunctionTimeZone(FunctionFactory &);
|
||||
void registerFunctionRunningAccumulate(FunctionFactory &);
|
||||
void registerFunctionRunningDifference(FunctionFactory &);
|
||||
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
|
||||
void registerFunctionRunningConcurrency(FunctionFactory &);
|
||||
void registerFunctionFinalizeAggregation(FunctionFactory &);
|
||||
void registerFunctionToLowCardinality(FunctionFactory &);
|
||||
void registerFunctionLowCardinalityIndices(FunctionFactory &);
|
||||
@ -112,6 +113,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionRunningAccumulate(factory);
|
||||
registerFunctionRunningDifference(factory);
|
||||
registerFunctionRunningDifferenceStartingWithFirstValue(factory);
|
||||
registerFunctionRunningConcurrency(factory);
|
||||
registerFunctionFinalizeAggregation(factory);
|
||||
registerFunctionToLowCardinality(factory);
|
||||
registerFunctionLowCardinalityIndices(factory);
|
||||
|
223
src/Functions/runningConcurrency.cpp
Normal file
223
src/Functions/runningConcurrency.cpp
Normal file
@ -0,0 +1,223 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Core/callOnTypeIndex.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <common/defines.h>
|
||||
#include <set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
|
||||
class ExecutableFunctionRunningConcurrency : public IExecutableFunctionImpl
|
||||
{
|
||||
public:
|
||||
String getName() const override
|
||||
{
|
||||
return Name::name;
|
||||
}
|
||||
|
||||
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
using ColVecArg = typename ArgDataType::ColumnType;
|
||||
const ColVecArg * col_begin = checkAndGetColumn<ColVecArg>(arguments[0].column.get());
|
||||
const ColVecArg * col_end = checkAndGetColumn<ColVecArg>(arguments[1].column.get());
|
||||
if (!col_begin || !col_end)
|
||||
throw Exception("Constant columns are not supported at the moment",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
const typename ColVecArg::Container & vec_begin = col_begin->getData();
|
||||
const typename ColVecArg::Container & vec_end = col_end->getData();
|
||||
|
||||
using ColVecConc = typename ConcurrencyDataType::ColumnType;
|
||||
typename ColVecConc::MutablePtr col_concurrency = ColVecConc::create(input_rows_count);
|
||||
typename ColVecConc::Container & vec_concurrency = col_concurrency->getData();
|
||||
|
||||
std::multiset<typename ArgDataType::FieldType> ongoing_until;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const auto begin = vec_begin[i];
|
||||
const auto end = vec_end[i];
|
||||
|
||||
if (unlikely(begin > end))
|
||||
{
|
||||
const FormatSettings default_format;
|
||||
WriteBufferFromOwnString buf_begin, buf_end;
|
||||
arguments[0].type->serializeAsTextQuoted(*(arguments[0].column), i, buf_begin, default_format);
|
||||
arguments[1].type->serializeAsTextQuoted(*(arguments[1].column), i, buf_end, default_format);
|
||||
throw Exception(
|
||||
"Incorrect order of events: " + buf_begin.str() + " > " + buf_end.str(),
|
||||
ErrorCodes::INCORRECT_DATA);
|
||||
}
|
||||
|
||||
ongoing_until.insert(end);
|
||||
|
||||
// Erase all the elements from "ongoing_until" which
|
||||
// are less than or equal to "begin", i.e. durations
|
||||
// that have already ended. We consider "begin" to be
|
||||
// inclusive, and "end" to be exclusive.
|
||||
ongoing_until.erase(
|
||||
ongoing_until.begin(), ongoing_until.upper_bound(begin));
|
||||
|
||||
vec_concurrency[i] = ongoing_until.size();
|
||||
}
|
||||
|
||||
return col_concurrency;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Name, typename ArgDataType, typename ConcurrencyDataType>
|
||||
class FunctionBaseRunningConcurrency : public IFunctionBaseImpl
|
||||
{
|
||||
public:
|
||||
explicit FunctionBaseRunningConcurrency(DataTypes argument_types_, DataTypePtr return_type_)
|
||||
: argument_types(std::move(argument_types_))
|
||||
, return_type(std::move(return_type_)) {}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Name::name;
|
||||
}
|
||||
|
||||
const DataTypes & getArgumentTypes() const override
|
||||
{
|
||||
return argument_types;
|
||||
}
|
||||
|
||||
const DataTypePtr & getResultType() const override
|
||||
{
|
||||
return return_type;
|
||||
}
|
||||
|
||||
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
return std::make_unique<ExecutableFunctionRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>();
|
||||
}
|
||||
|
||||
bool isStateful() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
DataTypes argument_types;
|
||||
DataTypePtr return_type;
|
||||
};
|
||||
|
||||
template <typename Name, typename ConcurrencyDataType>
|
||||
class RunningConcurrencyOverloadResolver : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
template <typename T>
|
||||
struct TypeTag
|
||||
{
|
||||
using Type = T;
|
||||
};
|
||||
|
||||
/// Call a polymorphic lambda with a type tag of src_type.
|
||||
template <typename F>
|
||||
void dispatchForSourceType(const IDataType & src_type, F && f) const
|
||||
{
|
||||
WhichDataType which(src_type);
|
||||
|
||||
switch (which.idx)
|
||||
{
|
||||
case TypeIndex::Date: f(TypeTag<DataTypeDate>()); break;
|
||||
case TypeIndex::DateTime: f(TypeTag<DataTypeDateTime>()); break;
|
||||
case TypeIndex::DateTime64: f(TypeTag<DataTypeDateTime64>()); break;
|
||||
default:
|
||||
throw Exception(
|
||||
"Arguments for function " + getName() + " must be Date, DateTime, or DateTime64.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
static FunctionOverloadResolverImplPtr create(const Context &)
|
||||
{
|
||||
return std::make_unique<RunningConcurrencyOverloadResolver<Name, ConcurrencyDataType>>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Name::name;
|
||||
}
|
||||
|
||||
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||
{
|
||||
// The type of the second argument must match with that of the first one.
|
||||
if (unlikely(!arguments[1].type->equals(*(arguments[0].type))))
|
||||
{
|
||||
throw Exception(
|
||||
"Function " + getName() + " must be called with two arguments having the same type.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
DataTypes argument_types = { arguments[0].type, arguments[1].type };
|
||||
FunctionBaseImplPtr base;
|
||||
dispatchForSourceType(*(arguments[0].type), [&](auto arg_type_tag) // Throws when the type is inappropriate.
|
||||
{
|
||||
using Tag = decltype(arg_type_tag);
|
||||
using ArgDataType = typename Tag::Type;
|
||||
|
||||
base = std::make_unique<FunctionBaseRunningConcurrency<Name, ArgDataType, ConcurrencyDataType>>(argument_types, return_type);
|
||||
});
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType(const DataTypes &) const override
|
||||
{
|
||||
return std::make_shared<ConcurrencyDataType>();
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isStateful() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameRunningConcurrency
|
||||
{
|
||||
static constexpr auto name = "runningConcurrency";
|
||||
};
|
||||
|
||||
void registerFunctionRunningConcurrency(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<RunningConcurrencyOverloadResolver<NameRunningConcurrency, DataTypeUInt32>>();
|
||||
}
|
||||
}
|
@ -423,6 +423,7 @@ SRCS(
|
||||
rowNumberInAllBlocks.cpp
|
||||
rowNumberInBlock.cpp
|
||||
runningAccumulate.cpp
|
||||
runningConcurrency.cpp
|
||||
runningDifference.cpp
|
||||
runningDifferenceStartingWithFirstValue.cpp
|
||||
sigmoid.cpp
|
||||
|
@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
|
||||
char * m_data = nullptr;
|
||||
size_t alignment = 0;
|
||||
|
||||
Memory() {}
|
||||
Memory() = default;
|
||||
|
||||
/// If alignment != 0, then allocate memory aligned to specified value.
|
||||
Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
|
||||
explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
|
||||
{
|
||||
alloc();
|
||||
}
|
||||
@ -140,7 +140,7 @@ protected:
|
||||
Memory<> memory;
|
||||
public:
|
||||
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
|
||||
BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment)
|
||||
{
|
||||
Base::set(existing_memory ? existing_memory : memory.data(), size);
|
||||
|
@ -1104,9 +1104,9 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
|
||||
assert(current >= in.position());
|
||||
assert(current <= in.buffer().end());
|
||||
|
||||
const int old_bytes = memory.size();
|
||||
const int additional_bytes = current - in.position();
|
||||
const int new_bytes = old_bytes + additional_bytes;
|
||||
const size_t old_bytes = memory.size();
|
||||
const size_t additional_bytes = current - in.position();
|
||||
const size_t new_bytes = old_bytes + additional_bytes;
|
||||
/// There are no new bytes to add to memory.
|
||||
/// No need to do extra stuff.
|
||||
if (new_bytes == 0)
|
||||
|
@ -13,6 +13,7 @@
|
||||
# include <aws/core/platform/Environment.h>
|
||||
# include <aws/core/utils/logging/LogMacros.h>
|
||||
# include <aws/core/utils/logging/LogSystemInterface.h>
|
||||
# include <aws/core/utils/HashingUtils.h>
|
||||
# include <aws/s3/S3Client.h>
|
||||
# include <aws/core/http/HttpClientFactory.h>
|
||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||
@ -273,56 +274,12 @@ namespace S3
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// This method is not static because it requires ClientFactory to be initialized.
|
||||
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
||||
const String & endpoint,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
bool use_environment_credentials,
|
||||
const RemoteHostFilter & remote_host_filter,
|
||||
unsigned int s3_max_redirects)
|
||||
{
|
||||
PocoHTTPClientConfiguration client_configuration(remote_host_filter, s3_max_redirects);
|
||||
|
||||
if (!endpoint.empty())
|
||||
client_configuration.endpointOverride = endpoint;
|
||||
|
||||
return create(client_configuration,
|
||||
is_virtual_hosted_style,
|
||||
access_key_id,
|
||||
secret_access_key,
|
||||
use_environment_credentials);
|
||||
}
|
||||
|
||||
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
||||
const PocoHTTPClientConfiguration & cfg_,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
bool use_environment_credentials)
|
||||
{
|
||||
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
|
||||
|
||||
PocoHTTPClientConfiguration client_configuration = cfg_;
|
||||
client_configuration.updateSchemeAndRegion();
|
||||
|
||||
return std::make_shared<Aws::S3::S3Client>(
|
||||
std::make_shared<S3CredentialsProviderChain>(
|
||||
client_configuration,
|
||||
credentials,
|
||||
use_environment_credentials), // AWS credentials provider.
|
||||
std::move(client_configuration), // Client configuration.
|
||||
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
|
||||
is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing if endpoint is not specified.
|
||||
);
|
||||
}
|
||||
|
||||
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
||||
const PocoHTTPClientConfiguration & cfg_,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
const String & server_side_encryption_customer_key_base64,
|
||||
HeaderCollection headers,
|
||||
bool use_environment_credentials)
|
||||
{
|
||||
@ -331,7 +288,28 @@ namespace S3
|
||||
|
||||
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
|
||||
|
||||
auto auth_signer = std::make_shared<S3AuthSigner>(client_configuration, std::move(credentials), std::move(headers), use_environment_credentials);
|
||||
if (!server_side_encryption_customer_key_base64.empty())
|
||||
{
|
||||
/// See S3Client::GeneratePresignedUrlWithSSEC().
|
||||
|
||||
headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
|
||||
Aws::S3::Model::ServerSideEncryptionMapper::GetNameForServerSideEncryption(Aws::S3::Model::ServerSideEncryption::AES256)});
|
||||
|
||||
headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
|
||||
server_side_encryption_customer_key_base64});
|
||||
|
||||
Aws::Utils::ByteBuffer buffer = Aws::Utils::HashingUtils::Base64Decode(server_side_encryption_customer_key_base64);
|
||||
String str_buffer(reinterpret_cast<char *>(buffer.GetUnderlyingData()), buffer.GetLength());
|
||||
headers.push_back({Aws::S3::SSEHeaders::SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
|
||||
Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateMD5(str_buffer))});
|
||||
}
|
||||
|
||||
auto auth_signer = std::make_shared<S3AuthSigner>(
|
||||
client_configuration,
|
||||
std::move(credentials),
|
||||
std::move(headers),
|
||||
use_environment_credentials);
|
||||
|
||||
return std::make_shared<Aws::S3::S3Client>(
|
||||
std::move(auth_signer),
|
||||
std::move(client_configuration), // Client configuration.
|
||||
|
@ -31,27 +31,12 @@ public:
|
||||
|
||||
static ClientFactory & instance();
|
||||
|
||||
std::shared_ptr<Aws::S3::S3Client> create(
|
||||
const String & endpoint,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
bool use_environment_credentials,
|
||||
const RemoteHostFilter & remote_host_filter,
|
||||
unsigned int s3_max_redirects);
|
||||
|
||||
std::shared_ptr<Aws::S3::S3Client> create(
|
||||
const PocoHTTPClientConfiguration & cfg,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
bool use_environment_credentials);
|
||||
|
||||
std::shared_ptr<Aws::S3::S3Client> create(
|
||||
const PocoHTTPClientConfiguration & cfg,
|
||||
bool is_virtual_hosted_style,
|
||||
const String & access_key_id,
|
||||
const String & secret_access_key,
|
||||
const String & server_side_encryption_customer_key_base64,
|
||||
HeaderCollection headers,
|
||||
bool use_environment_credentials);
|
||||
|
||||
|
@ -95,8 +95,15 @@ public:
|
||||
++pos;
|
||||
}
|
||||
|
||||
virtual void sync() {}
|
||||
virtual void finalize() {}
|
||||
virtual void sync()
|
||||
{
|
||||
next();
|
||||
}
|
||||
|
||||
virtual void finalize()
|
||||
{
|
||||
next();
|
||||
}
|
||||
|
||||
private:
|
||||
/** Write the data in the buffer (from the beginning of the buffer to the current position).
|
||||
|
@ -188,14 +188,14 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress)
|
||||
|
||||
void WriteBufferFromHTTPServerResponse::finalize()
|
||||
{
|
||||
if (offset())
|
||||
next();
|
||||
if (out)
|
||||
{
|
||||
next();
|
||||
|
||||
if (out)
|
||||
out.reset();
|
||||
out->next();
|
||||
out.reset();
|
||||
}
|
||||
else
|
||||
|
||||
if (!offset())
|
||||
{
|
||||
/// If no remaining data, just send headers.
|
||||
std::lock_guard lock(mutex);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user