Merge branch 'master' into fix-apple-clang-build

This commit is contained in:
alexey-milovidov 2021-03-30 13:15:07 +03:00 committed by GitHub
commit 4e4d33e93b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
431 changed files with 10637 additions and 7795 deletions

View File

@ -47,6 +47,10 @@ endif()
target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
target_link_libraries(common PUBLIC -Wl,-U,_inside_main)
endif()
# Allow explicit fallback to readline
if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
message (STATUS "Attempt to fallback to readline explicitly")

View File

@ -853,15 +853,43 @@ public:
{
if (hours == 1)
return toStartOfHour(t);
/** We will round the hour number since the midnight.
* It may split the day into non-equal intervals.
* For example, if we will round to 11-hour interval,
* the day will be split to the intervals 00:00:00..10:59:59, 11:00:00..21:59:59, 22:00:00..23:59:59.
* In case of daylight saving time or other transitions,
* the intervals can be shortened or prolonged to the amount of transition.
*/
UInt64 seconds = hours * 3600;
t = roundDown(t, seconds);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t;
time_t time = t - values.date;
if (time >= values.time_at_offset_change())
{
/// Align to new hour numbers before rounding.
time += values.amount_of_offset_change();
time = time / seconds * seconds;
/// TODO check if it's correct.
return toStartOfHour(t);
/// Should subtract the shift back but only if rounded time is not before shift.
if (time >= values.time_at_offset_change())
{
time -= values.amount_of_offset_change();
/// With cutoff at the time of the shift. Otherwise we may end up with something like 23:00 previous day.
if (time < values.time_at_offset_change())
time = values.time_at_offset_change();
}
}
else
{
time = time / seconds * seconds;
}
return values.date + time;
}
inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const
@ -869,6 +897,14 @@ public:
if (minutes == 1)
return toStartOfMinute(t);
/** In contrast to "toStartOfHourInterval" function above,
* the minute intervals are not aligned to the midnight.
* You will get unexpected results if for example, you round down to 60 minute interval
* and there was a time shift to 30 minutes.
*
* But this is not specified in docs and can be changed in future.
*/
UInt64 seconds = 60 * minutes;
return roundDown(t, seconds);
}

View File

@ -1,45 +1,28 @@
// https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin
#include <common/setTerminalEcho.h>
#include <common/errnoToString.h>
#include <stdexcept>
#include <cstring>
#include <string>
#ifdef WIN32
#include <windows.h>
#else
#include <termios.h>
#include <unistd.h>
#include <errno.h>
#endif
void setTerminalEcho(bool enable)
{
#ifdef WIN32
auto handle = GetStdHandle(STD_INPUT_HANDLE);
DWORD mode;
if (!GetConsoleMode(handle, &mode))
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + std::to_string(GetLastError()));
/// Obtain terminal attributes,
/// toggle the ECHO flag
/// and set them back.
if (!enable)
mode &= ~ENABLE_ECHO_INPUT;
else
mode |= ENABLE_ECHO_INPUT;
struct termios tty{};
if (!SetConsoleMode(handle, mode))
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + std::to_string(GetLastError()));
#else
struct termios tty;
if (tcgetattr(STDIN_FILENO, &tty))
if (0 != tcgetattr(STDIN_FILENO, &tty))
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
if (!enable)
tty.c_lflag &= ~ECHO;
else
tty.c_lflag |= ECHO;
auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty);
if (ret)
if (enable)
tty.c_lflag |= ECHO;
else
tty.c_lflag &= ~ECHO;
if (0 != tcsetattr(STDIN_FILENO, TCSANOW, &tty))
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
#endif
}

View File

@ -5,6 +5,11 @@ add_library (daemon
)
target_include_directories (daemon PUBLIC ..)
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
endif()
target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES})
if (USE_SENTRY)

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,21 @@ ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.4.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.tech/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
@ -19,20 +34,37 @@ RUN groupadd -r clickhouse --gid=101 \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
&& apt-get update \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version \
locales \
wget \
tzdata \
&& if [ -n "$deb_location_url" ]; then \
echo "installing from custom url with deb packages: $deb_location_url" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "$single_binary_location_url" ]; then \
echo "installing from single binary url: $single_binary_location_url" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
echo "installing from repository: $repository" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -38,9 +38,6 @@ if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
exit 1
fi
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
@ -108,6 +105,9 @@ EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"

View File

@ -70,7 +70,7 @@ function start_server
--path "$FASTTEST_DATA"
--user_files_path "$FASTTEST_DATA/user_files"
--top_level_domains_path "$FASTTEST_DATA/top_level_domains"
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
--keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
)
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
server_pid=$!

View File

@ -69,7 +69,7 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
- MySQl `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.

View File

@ -1514,6 +1514,14 @@ FORMAT PrettyCompactMonoBlock
Default value: 0
## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit}
Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached.
Too many values may require significant amount for processing, while the benefit is doubtful, since if you have huge number of values in `IN (...)`, then most likely the query will be sent to all shards anyway.
Default value: 1000
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
@ -2728,11 +2736,11 @@ Default value: `0`.
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
Enables or disables truncate before insert in file engine tables.
Enables or disables truncate before insert in [File](../../engines/table-engines/special/file.md) engine tables.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` replaces existing content of the file with the new data.
Default value: `0`.
@ -2747,4 +2755,39 @@ Possible values:
Default value: `0`.
## allow_experimental_live_view {#allow-experimental-live-view}
Allows creation of experimental [live views](../../sql-reference/statements/create/view.md#live-view).
Possible values:
- 0 — Working with live views is disabled.
- 1 — Working with live views is enabled.
Default value: `0`.
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md#live-view) is alive .
Default value: `15`.
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md#live-view) is re-executed.
Default value: `64`.
## temporary_live_view_timeout {#temporary-live-view-timeout}
Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted.
Default value: `5`.
## periodic_live_view_refresh {#periodic-live-view-refresh}
Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh.
Default value: `60`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -243,7 +243,7 @@ The function works according to the algorithm:
**Syntax**
``` sql
windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
```
**Arguments**
@ -254,8 +254,10 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Parameters**
- `window` — Length of the sliding window. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` — It is an optional argument.
- `'strict'` — When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
- `mode` — It is an optional argument. One or more modes can be set.
- `'strict'` — If same condition holds for sequence of events then such non-unique events would be skipped.
- `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2.
- `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps.
**Returned value**

View File

@ -12,7 +12,9 @@ The search is case-sensitive by default in all these functions. There are separa
## position(haystack, needle), locate(haystack, needle) {#position}
Returns the position (in bytes) of the found substring in the string, starting from 1.
Searches for the substring `needle` in the string `haystack`.
Returns the position (in bytes) of the found substring in the string, starting from 1.
For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive).
@ -20,15 +22,22 @@ For a case-insensitive search, use the function [positionCaseInsensitive](#posit
``` sql
position(haystack, needle[, start_pos])
```
```
``` sql
position(needle IN haystack)
```
Alias: `locate(haystack, needle[, start_pos])`.
!!! note "Note"
Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`.
**Arguments**
- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md).
- `start_pos` Position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md). Optional.
**Returned values**
@ -83,6 +92,36 @@ Result:
└───────────────────────────────┘
```
**Examples for POSITION(needle IN haystack) syntax**
Query:
```sql
SELECT 3 = position('c' IN 'abc');
```
Result:
```text
┌─equals(3, position('abc', 'c'))─┐
│ 1 │
└─────────────────────────────────┘
```
Query:
```sql
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
```
Result:
```text
┌─equals(6, position(s, '/'))─┐
│ 1 │
└─────────────────────────────┘
```
## positionCaseInsensitive {#positioncaseinsensitive}
The same as [position](#position) returns the position (in bytes) of the found substring in the string, starting from 1. Use the function for a case-insensitive search.
@ -772,4 +811,3 @@ Result:
│ 2 │
└───────────────────────────────┘
```

View File

@ -68,7 +68,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
!!! important "Important"
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
```sql
@ -90,7 +90,9 @@ Live views work similarly to how a query in a distributed table works. But inste
See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
### Monitoring Changes {#live-view-monitoring}
You can monitor changes in the `LIVE VIEW` query result using [WATCH](../../../sql-reference/statements/watch.md) query.
```sql
WATCH [db.]live_view
@ -102,11 +104,10 @@ WATCH [db.]live_view
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
```
Watch a live view while doing a parallel insert into the source table.
```sql
WATCH lv
WATCH lv;
```
```bash
@ -128,16 +129,16 @@ INSERT INTO mt VALUES (2);
INSERT INTO mt VALUES (3);
```
or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
Or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
```sql
WATCH [db.]live_view EVENTS
WATCH [db.]live_view EVENTS;
```
**Example:**
```sql
WATCH lv EVENTS
WATCH lv EVENTS;
```
```bash
@ -163,15 +164,15 @@ SELECT * FROM [db.]live_view WHERE ...
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
### With Timeout {#live-view-with-timeout}
### WITH TIMEOUT Clause {#live-view-with-timeout}
When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
```
If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used.
**Example:**
@ -180,7 +181,7 @@ CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
```
### With Refresh {#live-view-with-refresh}
### WITH REFRESH Clause {#live-view-with-refresh}
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
@ -188,7 +189,7 @@ When a live view is created with a `WITH REFRESH` clause then it will be automat
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
```
If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
If the refresh value is not specified then the value specified by the [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh) setting is used.
**Example:**
@ -231,7 +232,7 @@ WATCH lv
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
```
### Usage
### Usage {#live-view-usage}
Most common uses of live view tables include:
@ -240,15 +241,4 @@ Most common uses of live view tables include:
- Watching for table changes and triggering a follow-up select queries.
- Watching metrics from system tables using periodic refresh.
### Settings {#live-view-settings}
You can use the following settings to control the behaviour of live views.
- `allow_experimental_live_view` - enable live views. Default is `0`.
- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->

View File

@ -17,19 +17,21 @@ WATCH [db.]live_view
[FORMAT format]
```
The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
The `WATCH` query performs continuous data retrieval from a [LIVE VIEW](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [LIVE VIEW](./create/view.md#live-view).
```sql
WATCH [db.]live_view
WATCH [db.]live_view [EVENTS] [LIMIT n] [FORMAT format]
```
## Virtual columns {#watch-virtual-columns}
The virtual `_version` column in the query result indicates the current result version.
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv
WATCH lv;
```
```bash
@ -47,6 +49,8 @@ WATCH lv
By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
**Example:**
```sql
INSERT INTO [db.]table WATCH [db.]live_view ...
```
@ -56,14 +60,14 @@ INSERT INTO [db.]table WATCH [db.]live_view ...
The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
```sql
WATCH [db.]live_view EVENTS
WATCH [db.]live_view EVENTS;
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS
WATCH lv EVENTS;
```
```bash
@ -78,17 +82,17 @@ WATCH lv EVENTS
## LIMIT Clause {#limit-clause}
The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
The `LIMIT n` clause specifies the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query result is evaluated.
```sql
WATCH [db.]live_view LIMIT 1
WATCH [db.]live_view LIMIT 1;
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS LIMIT 1
WATCH lv EVENTS LIMIT 1;
```
```bash
@ -102,5 +106,4 @@ WATCH lv EVENTS LIMIT 1
The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
!!! info "Note"
The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.

View File

@ -2615,14 +2615,69 @@ SELECT * FROM test2;
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
Включает или отключает возможность выполнять запрос `SELECT` к таблице на движке [File](../../engines/table-engines/special/file.md), не содержащей файл.
Возможные значения:
- 0 — запрос `SELECT` генерирует исключение.
- 1 — запрос `SELECT` возвращает пустой результат.
Значение по умолчанию: `0`.
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
Включает или выключает удаление данных из таблицы до вставки в таблицу на движке [File](../../engines/table-engines/special/file.md).
Возможные значения:
- 0 — запрос `INSERT` добавляет данные в конец файла после существующих.
- 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми.
Значение по умолчанию: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md).
Возможные значения:
- 0 — Использование типов данных для работы с географическими структурами не поддерживается.
- 1 — Использование типов данных для работы с географическими структурами поддерживается.
- 0 — использование типов данных для работы с географическими структурами не поддерживается.
- 1 — использование типов данных для работы с географическими структурами поддерживается.
Значение по умолчанию: `0`.
## allow_experimental_live_view {#allow-experimental-live-view}
Включает экспериментальную возможность использования [LIVE-представлений](../../sql-reference/statements/create/view.md#live-view).
Возможные значения:
- 0 — живые представления не поддерживаются.
- 1 — живые представления поддерживаются.
Значение по умолчанию: `0`.
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
Задает интервал в секундах для периодической проверки существования [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view).
Значение по умолчанию: `15`.
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
Задает наибольшее число вставок, после которых запрос на формирование [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) исполняется снова.
Значение по умолчанию: `64`.
## temporary_live_view_timeout {#temporary-live-view-timeout}
Задает время в секундах, после которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) удаляется.
Значение по умолчанию: `5`.
## periodic_live_view_refresh {#periodic-live-view-refresh}
Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется.
Значение по умолчанию: `60`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -243,7 +243,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
**Синтаксис**
``` sql
windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
```
**Аргументы**
@ -254,7 +254,10 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Параметры**
- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond2 <= timestamp события cond1 + window`.
- `mode` — необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений.
- `mode` — необязательный параметр. Может быть установленно несколько значений одновременно.
- `'strict'` — не учитывать подряд идущие повторяющиеся события.
- `'strict_order'` — запрещает посторонние события в искомой последовательности. Например, при поиске цепочки `A->B->C` в `A->B->D->C` поиск будет остановлен на `D` и функция вернет 2.
- `'strict_increase'` — условия прменяются только для событий со строго возрастающими временными метками.
**Возвращаемое значение**

View File

@ -7,7 +7,7 @@ toc_title: "Функции поиска в строках"
Во всех функциях, поиск регистрозависимый по умолчанию. Существуют варианты функций для регистронезависимого поиска.
## position(haystack, needle) {#position}
## position(haystack, needle), locate(haystack, needle) {#position}
Поиск подстроки `needle` в строке `haystack`.
@ -21,8 +21,15 @@ toc_title: "Функции поиска в строках"
position(haystack, needle[, start_pos])
```
``` sql
position(needle IN haystack)
```
Алиас: `locate(haystack, needle[, start_pos])`.
!!! note "Примечание"
Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`.
**Аргументы**
- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
@ -70,6 +77,36 @@ SELECT position('Привет, мир!', '!');
└───────────────────────────────┘
```
**Примеры работы функции с синтаксисом POSITION(needle IN haystack)**
Запрос:
```sql
SELECT 1 = position('абв' IN 'абв');
```
Результат:
```text
┌─equals(1, position('абв', 'абв'))─┐
│ 1 │
└───────────────────────────────────┘
```
Запрос:
```sql
SELECT 0 = position('абв' IN '');
```
Результат:
```text
┌─equals(0, position('', 'абв'))─┐
│ 1 │
└────────────────────────────────┘
```
## positionCaseInsensitive {#positioncaseinsensitive}
Такая же, как и [position](#position), но работает без учета регистра. Возвращает позицию в байтах найденной подстроки в строке, начиная с 1.
@ -758,4 +795,3 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
│ 3 │
└────────────────────────────────────────────────────────────┘
```

View File

@ -13,7 +13,7 @@ toc_title: "Представление"
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление - это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
Для примера, пусть вы создали представление:
@ -43,12 +43,12 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных.
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`.
Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление.
!!! important "Важно"
Материализованные представлени в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
Если указано `POPULATE`, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
@ -56,9 +56,177 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
Обратите внимание, что работа материализованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.
## LIVE-представления {#live-view}
!!! important "Важно"
Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях.
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
```
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
Изменение `LIVE VIEW` запускается при вставке данных в таблицу, указанную в исходном запросе `SELECT`.
LIVE-представления работают по тому же принципу, что и распределенные таблицы. Но вместо объединения отдельных частей данных с разных серверов, LIVE-представления объединяют уже имеющийся результат с новыми данными. Если в исходном запросе LIVE-представления есть вложенный подзапрос, его результаты не кешируются, в кеше хранится только результат основного запроса.
!!! info "Ограничения"
- [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются.
- Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления.
- В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния.
- `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах.
- `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц.
В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh).
### Отслеживание изменений {#live-view-monitoring}
Для отслеживания изменений LIVE-представления используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
**Пример:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
```
Отслеживаем изменения LIVE-представления при вставке данных в исходную таблицу.
```sql
WATCH lv;
```
```bash
┌─sum(x)─┬─_version─┐
│ 1 │ 1 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 2 │ 2 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 6 │ 3 │
└────────┴──────────┘
...
```
```sql
INSERT INTO mt VALUES (1);
INSERT INTO mt VALUES (2);
INSERT INTO mt VALUES (3);
```
Для получения списка изменений используйте ключевое слово [EVENTS](../../../sql-reference/statements/watch.md#events-clause).
```sql
WATCH lv EVENTS;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
┌─version─┐
│ 3 │
└─────────┘
...
```
Для работы с LIVE-представлениями, как и с любыми другими, можно использовать запросы [SELECT](../../../sql-reference/statements/select/index.md). Если результат запроса кеширован, он будет возвращен немедленно, без обращения к исходным таблицам представления.
```sql
SELECT * FROM [db.]live_view WHERE ...
```
### Принудительное обновление {#live-view-alter-refresh}
Чтобы принудительно обновить LIVE-представление, используйте запрос `ALTER LIVE VIEW [db.]table_name REFRESH`.
### Секция WITH TIMEOUT {#live-view-with-timeout}
LIVE-представление, созданное с параметром `WITH TIMEOUT`, будет автоматически удалено через определенное количество секунд с момента предыдущего запроса [WATCH](../../../sql-reference/statements/watch.md), примененного к данному LIVE-представлению.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
```
Если временной промежуток не указан, используется значение настройки [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout).
**Пример:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
```
### Секция WITH REFRESH {#live-view-with-refresh}
LIVE-представление, созданное с параметром `WITH REFRESH`, будет автоматически обновляться через указанные промежутки времени, начиная с момента последнего обновления.
```sql
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
```
Если значение временного промежутка не задано, используется значение [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh).
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv;
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:05 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:10 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:15 │ 3 │
└─────────────────────┴──────────┘
```
Параметры `WITH TIMEOUT` и `WITH REFRESH` можно сочетать с помощью `AND`.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
```
По истечении 15 секунд представление будет автоматически удалено, если нет активного запроса `WATCH`.
```sql
WATCH lv;
```
```
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
```
### Использование {#live-view-usage}
Наиболее частые случаи использования `LIVE-VIEW`:
- Получение push-уведомлений об изменениях данных без дополнительных периодических запросов.
- Кеширование результатов часто используемых запросов для получения их без задержки.
- Отслеживание изменений таблицы для запуска других запросов `SELECT`.
- Отслеживание показателей из системных таблиц с помощью периодических обновлений.
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->

View File

@ -0,0 +1,106 @@
---
toc_priority: 53
toc_title: WATCH
---
# Запрос WATCH {#watch}
!!! important "Важно"
Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях.
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`.
**Синтаксис**
``` sql
WATCH [db.]live_view [EVENTS] [LIMIT n] [FORMAT format]
```
Запрос `WATCH` постоянно возвращает содержимое [LIVE-представления](./create/view.md#live-view). Если параметр `LIMIT` не был задан, запрос `WATCH` будет непрерывно обновлять содержимое [LIVE-представления](./create/view.md#live-view).
```sql
WATCH [db.]live_view;
```
## Виртуальные столбцы {#watch-virtual-columns}
Виртуальный столбец `_version` в результате запроса обозначает версию данного результата.
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv;
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:21 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:26 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:31 │ 3 │
└─────────────────────┴──────────┘
...
```
По умолчанию запрашиваемые данные возвращаются клиенту, однако в сочетании с запросом [INSERT INTO](../../sql-reference/statements/insert-into.md) они могут быть перенаправлены для вставки в другую таблицу.
**Пример:**
```sql
INSERT INTO [db.]table WATCH [db.]live_view ...
```
## Секция EVENTS {#events-clause}
С помощью параметра `EVENTS` можно получить компактную форму результата запроса `WATCH`. Вместо полного результата вы получаете номер последней версии результата.
```sql
WATCH [db.]live_view EVENTS;
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
...
```
## Секция LIMIT {#limit-clause}
Параметр `LIMIT n` задает количество обновлений запроса `WATCH`, после которого отслеживание прекращается. По умолчанию это число не задано, поэтому запрос будет выполняться постоянно. Значение `LIMIT 0` означает, что запрос `WATCH` вернет единственный актуальный результат запроса и прекратит отслеживание.
```sql
WATCH [db.]live_view LIMIT 1;
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS LIMIT 1;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
```
## Секция FORMAT {#format-clause}
Параметр `FORMAT` работает аналогично одноименному параметру запроса [SELECT](../../sql-reference/statements/select/format.md#format-clause).
!!! info "Примечание"
При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings).

View File

@ -23,7 +23,6 @@ nltk==3.5
nose==1.3.7
protobuf==3.14.0
numpy==1.19.2
Pygments==2.5.2
pymdown-extensions==8.0
python-slugify==4.0.1
PyYAML==5.4.1
@ -36,3 +35,4 @@ termcolor==1.1.0
tornado==6.1
Unidecode==1.1.1
urllib3==1.25.10
Pygments>=2.7.4

View File

@ -1,7 +1,5 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_folder_title: 快速上手
toc_priority: 2
---
@ -9,7 +7,7 @@ toc_priority: 2
如果您是ClickHouse的新手并希望亲身体验它的性能。
首先需要进行 [环境安装与部署](install.md).
首先需要完成 [安装与部署](install.md).
之后,您可以通过教程与示例数据完成自己的入门第一步:

View File

@ -1,5 +1,5 @@
---
toc_folder_title: 引言
toc_folder_title: 简介
toc_priority: 1
---

View File

@ -1,24 +1,8 @@
# 使用建议 {#usage-recommendations}
## CPU {#cpu}
## CPU频率调节器 {#cpu-scaling-governor}
必须支持SSE4.2指令集。 现代处理器自2008年以来支持它。
选择处理器时,与较少的内核和较高的时钟速率相比,更喜欢大量内核和稍慢的时钟速率。
例如具有2600MHz的16核心比具有3600MHz的8核心更好。
## 超线程 {#hyper-threading}
不要禁用超线程。 它有助于某些查询,但不适用于其他查询。
## 超频 {#turbo-boost}
强烈推荐超频(turbo-boost)。 它显着提高了典型负载的性能。
您可以使用 `turbostat` 要查看负载下的CPU的实际时钟速率。
## CPU缩放调控器 {#cpu-scaling-governor}
始终使用 `performance` 缩放调控器。 该 `on-demand` 随着需求的不断增加,缩放调节器的工作要糟糕得多。
始终使用 `performance` 频率调节器。 `on-demand` 频率调节器在持续高需求的情况下,效果更差。
``` bash
echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
@ -26,68 +10,70 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover
## CPU限制 {#cpu-limitations}
处理器可能会过热。 使用 `dmesg` 看CPU的时钟速率是否由于过热而受到限制。
此限制也可以在数据中心级别的外部设置。 您可以使用 `turbostat` 在负载下监视它
处理器可能会过热。 使用 `dmesg` 看CPU的时钟速率是否由于过热而受到限制。
该限制也可以在数据中心级别外部设置。 您可以使用 `turbostat` 在负载下对其进行监控
## RAM {#ram}
对于少量数据(高达-200GB压缩),最好使用与数据量一样多的内存。
对于大量数据处理交互式在线查询时应使用合理数量的RAM128GB或更多以便热数据子集适合页面缓存。
即使对于每台服务器约50TB的数据量使用128GB的RAM与64GB相比显着提高了查询性能。
对于少量数据(压缩后约200GB),最好使用与数据量一样多的内存。
对于大量数据,以及在处理交互式在线查询时应使用合理数量的RAM128GB或更多以便热数据子集适合页面缓存。
即使对于每台服务器约50TB的数据量与64GB相比使用128GB的RAM也可以显着提高查询性能。
## 交换文件 {#swap-file}
不要禁用 overcommit。`cat /proc/sys/vm/overcommit_memory` 的值应该为0或1。运行
始终禁用交换文件。 不这样做的唯一原因是如果您使用的ClickHouse在您的个人笔记本电脑。
``` bash
$ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
```
## 大页(Huge Pages) {#huge-pages}
始终禁用透明大页(transparent huge pages)。 它会干扰内存分alloc,从而导致显着的性能下降。
始终禁用透明大页(transparent huge pages)。 它会干扰内存分配器,从而导致显着的性能下降。
``` bash
echo 'never' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
使用 `perf top` 观察内核中用于内存管理的时间。
使用 `perf top` 来查看内核在内存管理上花费的时间。
永久大页(permanent huge pages)也不需要被分配。
## 存储系统 {#storage-subsystem}
## 存储系统 {#storage-subsystem}
如果您的预算允许您使用SSD请使用SSD。
如果没有,请使用硬盘。 SATA硬盘7200转就行了。
优先选择带有本地硬盘驱动器的大量服务器,而不是带有附加磁盘架的小量服务器。
但是对于存储具有罕见查询的档案,货架将起作用。
优先选择许多带有本地硬盘驱动器的服务器,而不是少量带有附加磁盘架的服务器。
但是对于存储极少查询的档案,架子可以使用。
## RAID {#raid}
当使用硬盘你可以结合他们的RAID-10RAID-5RAID-6或RAID-50。
对于Linux软件RAID更好 `mdadm`). 我们不建议使用LVM。
对于Linux软件RAID更好使用 `mdadm`). 我们不建议使用LVM。
当创建RAID-10选择 `far` 布局。
如果您的预算允许请选择RAID-10。
如果您有超过4个磁盘请使用RAID-6首选或RAID-50而不是RAID-5。
如果您有4个以上的磁盘请使用RAID-6首选或RAID-50而不是RAID-5。
当使用RAID-5、RAID-6或RAID-50时始终增加stripe_cache_size因为默认值通常不是最佳选择。
``` bash
echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size
```
使用以下公式从设备数量和块大小计算确切数量: `2 * num_devices * chunk_size_in_bytes / 4096`.
使用以下公式从设备数量和块大小计算确切数量: `2 * num_devices * chunk_size_in_bytes / 4096`
1025KB的块大小足以满足所有RAID配置。
1024KB的块大小足以满足所有RAID配置。
切勿将块大小设置得太小或太大。
您可以在SSD上使用RAID-0。
无论使用种RAID始终使用复制来保证数据安全。
无论使用种RAID始终使用复制来保证数据安全。
使用长队列启用NCQ。 对于HDD选择CFQ调度程序对于SSD选择noop。 不要减少 readahead 设置。
启用有长队列的NCQ。 对于HDD选择CFQ调度程序对于SSD选择noop。 不要减少 readahead 设置。
对于HDD启用写入缓存。
## 文件系统 {#file-system}
Ext4是最可靠的选择。 设置挂载选项 `noatime, nobarrier`.
XFS也是合适的但它还没有经过ClickHouse的彻底测试。
大多数其他文件系统也应该正常工作。 具有延迟分配的文件系统工作得更好。
XFS也是合适的但它还没有经过ClickHouse的全面测试。
大多数其他文件系统也应该可以正常工作。 具有延迟分配的文件系统工作得更好。
## Linux内核 {#linux-kernel}
@ -95,26 +81,43 @@ XFS也是合适的但它还没有经过ClickHouse的彻底测试。
## 网络 {#network}
如果使用的是IPv6请增加路由缓存的大小。
3.2之前的Linux内核在IPv6实现方面遇到了许多问题。
如果使用的是IPv6请增加路由缓存的大小。
3.2之前的Linux内核在IPv6实现方面存在许多问题。
如果可能的话至少使用一个10GB的网络。 1Gb也可以工作但对于使用数十tb的数据修补副本或处理具有大量中间数据的分布式查询情况会更糟。
如果可能的话至少使用10GB的网络。1GB也可以工作但对于使用数十TB的数据修补副本或处理具有大量中间数据的分布式查询情况会更糟。
## 虚拟机监视器(Hypervisor)配置
如果您使用的是OpenStack请在nova.conf中设置
```
cpu_mode=host-passthrough
```
如果您使用的是libvirt请在XML配置中设置
```
<cpu mode='host-passthrough'/>
```
这对于ClickHouse能够通过 `cpuid` 指令获取正确的信息非常重要。
否则当在旧的CPU型号上运行虚拟机监视器时可能会导致 `Illegal instruction` 崩溃。
## Zookeeper {#zookeeper}
您可能已经将ZooKeeper用于其他目的。 您可以使用相同的zookeeper安装如果它还没有超载。
您可能已经将ZooKeeper用于其他目的。 如果它还没有超载,您可以使用相同的zookeeper。
最好使用新版本的 Zookeeper 3.4.9 或之后的版本. 稳定 Liunx 发行版中的 Zookeeper 版本可能是落后的。
最好使用新版本的Zookeeper 3.4.9 或更高的版本. 稳定的Liunx发行版中的Zookeeper版本可能已过时
你永远不该使用自己手写的脚本在不同的 Zookeeper 集群之间转移数据, 这可能会导致序列节点的数据不正确。出于同样的原因,永远不要使用 zkcopy 工具: https://github.com/ksprojects/zkcopy/issues/15
你永远不要使用手动编写的脚本在不同的Zookeeper集群之间传输数据, 这可能会导致序列节点的数据不正确。出于同的原因,永远不要使用 zkcopy 工具: https://github.com/ksprojects/zkcopy/issues/15
如果要将现有ZooKeeper集群分为两个正确的方法是增加其副本的数量然后将其重新配置为两个独立的集群。
如果要将现有ZooKeeper集群分为两个正确的方法是增加其副本的数量然后将其重新配置为两个独立的集群。
不要在与ClickHouse相同的服务器上运行ZooKeeper。 因为ZooKeeper对延迟非常敏感而ClickHouse可能会占用所有可用的系统资源。
不要在ClickHouse所在的服务器上运行ZooKeeper。 因为ZooKeeper对延迟非常敏感而ClickHouse可能会占用所有可用的系统资源。
默认设置下ZooKeeper 就像是一个定时炸弹:
当使用默认配置时ZooKeeper服务不会从旧快照和日志中删除文件请参阅autopurge这是操作员的责任。
当使用默认配置时ZooKeeper服务不会从旧快照和日志中删除文件请参阅autopurge这是操作员的责任。
必须拆除炸弹。
@ -222,7 +225,7 @@ JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \
-XX:+CMSParallelRemarkEnabled"
```
Salt init:
初始化:
description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service"

View File

@ -106,7 +106,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
try
{
type->deserializeAsTextQuoted(*column_dummy, rb, FormatSettings());
type->getDefaultSerialization()->deserializeTextQuoted(*column_dummy, rb, FormatSettings());
}
catch (Exception & e)
{
@ -1719,7 +1719,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
for (size_t i = 0; i < column.column->size(); ++i)
{
WriteBufferFromOwnString wb;
column.type->serializeAsTextQuoted(*column.column, i, wb, FormatSettings());
column.type->getDefaultSerialization()->serializeTextQuoted(*column.column, i, wb, FormatSettings());
res.emplace(wb.str());
}
}

View File

@ -100,16 +100,16 @@ class IModel
{
public:
/// Call train iteratively for each block to train a model.
virtual void train(const IColumn & column);
virtual void train(const IColumn & column) = 0;
/// Call finalize one time after training before generating.
virtual void finalize();
virtual void finalize() = 0;
/// Call generate: pass source data column to obtain a column with anonymized data as a result.
virtual ColumnPtr generate(const IColumn & column);
virtual ColumnPtr generate(const IColumn & column) = 0;
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual void updateSeed();
virtual void updateSeed() = 0;
virtual ~IModel() = default;
};

View File

@ -97,7 +97,7 @@
#endif
#if USE_NURAFT
# include <Server/NuKeeperTCPHandlerFactory.h>
# include <Server/KeeperTCPHandlerFactory.h>
#endif
namespace CurrentMetrics
@ -867,15 +867,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
listen_try = true;
}
if (config().has("test_keeper_server"))
if (config().has("keeper_server"))
{
#if USE_NURAFT
/// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config.
global_context->initializeNuKeeperStorageDispatcher();
global_context->initializeKeeperStorageDispatcher();
for (const auto & listen_host : listen_hosts)
{
/// TCP NuKeeper
const char * port_name = "test_keeper_server.tcp_port";
/// TCP Keeper
const char * port_name = "keeper_server.tcp_port";
createServer(listen_host, port_name, listen_try, [&](UInt16 port)
{
Poco::Net::ServerSocket socket;
@ -885,9 +885,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers_to_start_before_tables->emplace_back(
port_name,
std::make_unique<Poco::Net::TCPServer>(
new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
new KeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString());
LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString());
});
}
#else
@ -934,7 +934,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
else
LOG_INFO(log, "Closed connections to servers for tables.");
global_context->shutdownNuKeeperStorageDispatcher();
global_context->shutdownKeeperStorageDispatcher();
}
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.

View File

@ -0,0 +1 @@
../../../tests/config/config.d/keeper_port.xml

View File

@ -1 +0,0 @@
../../../tests/config/config.d/test_keeper_port.xml

View File

@ -39,6 +39,8 @@ class AggregateFunctionArgMinMax final : public IAggregateFunctionTupleArgHelper
private:
const DataTypePtr & type_res;
const DataTypePtr & type_val;
const SerializationPtr serialization_res;
const SerializationPtr serialization_val;
bool tuple_argument;
using Base = IAggregateFunctionTupleArgHelper<Data, AggregateFunctionArgMinMax<Data>, 2>;
@ -48,6 +50,8 @@ public:
: Base({type_res_, type_val_}, {}, tuple_argument_)
, type_res(this->argument_types[0])
, type_val(this->argument_types[1])
, serialization_res(type_res->getDefaultSerialization())
, serialization_val(type_val->getDefaultSerialization())
{
if (!type_val->isComparable())
throw Exception(
@ -84,14 +88,14 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
{
this->data(place).result.write(buf, *type_res);
this->data(place).value.write(buf, *type_val);
this->data(place).result.write(buf, *serialization_res);
this->data(place).value.write(buf, *serialization_val);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
{
this->data(place).result.read(buf, *type_res, arena);
this->data(place).value.read(buf, *type_val, arena);
this->data(place).result.read(buf, *serialization_res, arena);
this->data(place).value.read(buf, *serialization_val, arena);
}
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }

View File

@ -55,7 +55,8 @@ class AggregateFunctionGroupArrayInsertAtGeneric final
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
{
private:
DataTypePtr & type;
DataTypePtr type;
SerializationPtr serialization;
Field default_value;
UInt64 length_to_resize = 0; /// zero means - do not do resizing.
@ -63,6 +64,7 @@ public:
AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params)
, type(argument_types[0])
, serialization(type->getDefaultSerialization())
{
if (!params.empty())
{
@ -154,7 +156,7 @@ public:
else
{
writeBinary(UInt8(0), buf);
type->serializeBinary(elem, buf);
serialization->serializeBinary(elem, buf);
}
}
}
@ -175,7 +177,7 @@ public:
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
type->deserializeBinary(arr[i], buf);
serialization->deserializeBinary(arr[i], buf);
}
}

View File

@ -50,14 +50,14 @@ public:
assert_cast<ColVecType &>(to).insertDefault();
}
void write(WriteBuffer & buf, const IDataType & /*data_type*/) const
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
{
writeBinary(has(), buf);
if (has())
writeBinary(value, buf);
}
void read(ReadBuffer & buf, const IDataType & /*data_type*/, Arena *)
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena *)
{
readBinary(has_value, buf);
if (has())
@ -221,14 +221,14 @@ public:
assert_cast<ColumnString &>(to).insertDefault();
}
void write(WriteBuffer & buf, const IDataType & /*data_type*/) const
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
{
writeBinary(size, buf);
if (has())
buf.write(getData(), size);
}
void read(ReadBuffer & buf, const IDataType & /*data_type*/, Arena * arena)
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
{
Int32 rhs_size;
readBinary(rhs_size, buf);
@ -427,24 +427,24 @@ public:
to.insertDefault();
}
void write(WriteBuffer & buf, const IDataType & data_type) const
void write(WriteBuffer & buf, const ISerialization & serialization) const
{
if (!value.isNull())
{
writeBinary(true, buf);
data_type.serializeBinary(value, buf);
serialization.serializeBinary(value, buf);
}
else
writeBinary(false, buf);
}
void read(ReadBuffer & buf, const IDataType & data_type, Arena *)
void read(ReadBuffer & buf, const ISerialization & serialization, Arena *)
{
bool is_not_null;
readBinary(is_not_null, buf);
if (is_not_null)
data_type.deserializeBinary(value, buf);
serialization.deserializeBinary(value, buf);
}
void change(const IColumn & column, size_t row_num, Arena *)
@ -678,15 +678,15 @@ struct AggregateFunctionAnyHeavyData : Data
return false;
}
void write(WriteBuffer & buf, const IDataType & data_type) const
void write(WriteBuffer & buf, const ISerialization & serialization) const
{
Data::write(buf, data_type);
Data::write(buf, serialization);
writeBinary(counter, buf);
}
void read(ReadBuffer & buf, const IDataType & data_type, Arena * arena)
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena)
{
Data::read(buf, data_type, arena);
Data::read(buf, serialization, arena);
readBinary(counter, buf);
}
@ -698,12 +698,14 @@ template <typename Data>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
{
private:
DataTypePtr & type;
DataTypePtr type;
SerializationPtr serialization;
public:
AggregateFunctionsSingleValue(const DataTypePtr & type_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>({type_}, {})
, type(this->argument_types[0])
, serialization(type->getDefaultSerialization())
{
if (StringRef(Data::name()) == StringRef("min")
|| StringRef(Data::name()) == StringRef("max"))
@ -733,12 +735,12 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
{
this->data(place).write(buf, *type.get());
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
{
this->data(place).read(buf, *type.get(), arena);
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override

View File

@ -64,7 +64,9 @@ class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
{
private:
DataTypePtr keys_type;
SerializationPtr keys_serialization;
DataTypes values_types;
Serializations values_serializations;
public:
using Base = IAggregateFunctionDataHelper<
@ -72,9 +74,14 @@ public:
AggregateFunctionMapBase(const DataTypePtr & keys_type_,
const DataTypes & values_types_, const DataTypes & argument_types_)
: Base(argument_types_, {} /* parameters */), keys_type(keys_type_),
values_types(values_types_)
: Base(argument_types_, {} /* parameters */)
, keys_type(keys_type_)
, keys_serialization(keys_type->getDefaultSerialization())
, values_types(values_types_)
{
values_serializations.reserve(values_types.size());
for (const auto & type : values_types)
values_serializations.emplace_back(type->getDefaultSerialization());
}
DataTypePtr getReturnType() const override
@ -248,9 +255,9 @@ public:
for (const auto & elem : merged_maps)
{
keys_type->serializeBinary(elem.first, buf);
keys_serialization->serializeBinary(elem.first, buf);
for (size_t col = 0; col < values_types.size(); ++col)
values_types[col]->serializeBinary(elem.second[col], buf);
values_serializations[col]->serializeBinary(elem.second[col], buf);
}
}
@ -263,12 +270,12 @@ public:
for (size_t i = 0; i < size; ++i)
{
Field key;
keys_type->deserializeBinary(key, buf);
keys_serialization->deserializeBinary(key, buf);
Array values;
values.resize(values_types.size());
for (size_t col = 0; col < values_types.size(); ++col)
values_types[col]->deserializeBinary(values[col], buf);
values_serializations[col]->deserializeBinary(values[col], buf);
if constexpr (IsDecimalNumber<T>)
merged_maps[key.get<DecimalField<T>>()] = values;

View File

@ -6,7 +6,6 @@
#include <DataTypes/DataTypeDateTime.h>
#include <ext/range.h>
#include "registerAggregateFunctions.h"
namespace DB

View File

@ -19,22 +19,13 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
struct ComparePair final
{
template <typename T1, typename T2>
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
{
return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first;
}
};
static constexpr auto max_events = 32;
template <typename T>
struct AggregateFunctionWindowFunnelData
{
using TimestampEvent = std::pair<T, UInt8>;
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
using Comparator = ComparePair;
bool sorted = true;
TimestampEvents events_list;
@ -46,7 +37,7 @@ struct AggregateFunctionWindowFunnelData
void add(T timestamp, UInt8 event)
{
// Since most events should have already been sorted by timestamp.
/// Since most events should have already been sorted by timestamp.
if (sorted && events_list.size() > 0)
{
if (events_list.back().first == timestamp)
@ -68,7 +59,7 @@ struct AggregateFunctionWindowFunnelData
/// either sort whole container or do so partially merging ranges afterwards
if (!sorted && !other.sorted)
std::stable_sort(std::begin(events_list), std::end(events_list), Comparator{});
std::stable_sort(std::begin(events_list), std::end(events_list));
else
{
const auto begin = std::begin(events_list);
@ -76,12 +67,12 @@ struct AggregateFunctionWindowFunnelData
const auto end = std::end(events_list);
if (!sorted)
std::stable_sort(begin, middle, Comparator{});
std::stable_sort(begin, middle);
if (!other.sorted)
std::stable_sort(middle, end, Comparator{});
std::stable_sort(middle, end);
std::inplace_merge(begin, middle, end, Comparator{});
std::inplace_merge(begin, middle, end);
}
sorted = true;
@ -91,7 +82,7 @@ struct AggregateFunctionWindowFunnelData
{
if (!sorted)
{
std::stable_sort(std::begin(events_list), std::end(events_list), Comparator{});
std::stable_sort(std::begin(events_list), std::end(events_list));
sorted = true;
}
}
@ -145,14 +136,20 @@ class AggregateFunctionWindowFunnel final
private:
UInt64 window;
UInt8 events_size;
UInt8 strict; // When the 'strict' is set, it applies conditions only for the not repeating values.
UInt8 strict_order; // When the 'strict_order' is set, it doesn't allow interventions of other events.
// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
/// When the 'strict' is set, it applies conditions only for the not repeating values.
bool strict;
// Loop through the entire events_list, update the event timestamp value
// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
// If found, returns the max event level, else return 0.
// The Algorithm complexity is O(n).
/// When the 'strict_order' is set, it doesn't allow interventions of other events.
/// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
bool strict_order;
/// Applies conditions only to events with strictly increasing timestamps
bool strict_increase;
/// Loop through the entire events_list, update the event timestamp value
/// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
/// If found, returns the max event level, else return 0.
/// The Algorithm complexity is O(n).
UInt8 getEventLevel(Data & data) const
{
if (data.size() == 0)
@ -162,16 +159,13 @@ private:
data.sort();
/// events_timestamp stores the timestamp that latest i-th level event happen within time window after previous level event.
/// timestamp defaults to -1, which unsigned timestamp value never meet
/// there may be some bugs when UInt64 type timstamp overflows Int64, but it works on most cases.
std::vector<Int64> events_timestamp(events_size, -1);
/// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
bool first_event = false;
for (const auto & pair : data.events_list)
{
const T & timestamp = pair.first;
const auto & event_idx = pair.second - 1;
if (strict_order && event_idx == -1)
{
if (first_event)
@ -181,31 +175,39 @@ private:
}
else if (event_idx == 0)
{
events_timestamp[0] = timestamp;
events_timestamp[0] = std::make_pair(timestamp, timestamp);
first_event = true;
}
else if (strict && events_timestamp[event_idx] >= 0)
else if (strict && events_timestamp[event_idx].has_value())
{
return event_idx + 1;
}
else if (strict_order && first_event && events_timestamp[event_idx - 1] == -1)
else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
{
for (size_t event = 0; event < events_timestamp.size(); ++event)
{
if (events_timestamp[event] == -1)
if (!events_timestamp[event].has_value())
return event;
}
}
else if (events_timestamp[event_idx - 1] >= 0 && timestamp <= events_timestamp[event_idx - 1] + window)
else if (events_timestamp[event_idx - 1].has_value())
{
events_timestamp[event_idx] = events_timestamp[event_idx - 1];
if (event_idx + 1 == events_size)
return events_size;
auto first_timestamp = events_timestamp[event_idx - 1]->first;
bool time_matched = timestamp <= first_timestamp + window;
if (strict_increase)
time_matched = time_matched && events_timestamp[event_idx - 1]->second < timestamp;
if (time_matched)
{
events_timestamp[event_idx] = std::make_pair(first_timestamp, timestamp);
if (event_idx + 1 == events_size)
return events_size;
}
}
}
for (size_t event = events_timestamp.size(); event > 0; --event)
{
if (events_timestamp[event - 1] >= 0)
if (events_timestamp[event - 1].has_value())
return event;
}
return 0;
@ -223,15 +225,18 @@ public:
events_size = arguments.size() - 1;
window = params.at(0).safeGet<UInt64>();
strict = 0;
strict_order = 0;
strict = false;
strict_order = false;
strict_increase = false;
for (size_t i = 1; i < params.size(); ++i)
{
String option = params.at(i).safeGet<String>();
if (option.compare("strict") == 0)
strict = 1;
else if (option.compare("strict_order") == 0)
strict_order = 1;
if (option == "strict")
strict = true;
else if (option == "strict_order")
strict_order = true;
else if (option == "strict_increase")
strict_increase = true;
else
throw Exception{"Aggregate function " + getName() + " doesn't support a parameter: " + option, ErrorCodes::BAD_ARGUMENTS};
}
@ -253,7 +258,7 @@ public:
{
bool has_event = false;
const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
// reverse iteration and stable sorting are needed for events that are qualified by more than one condition.
/// reverse iteration and stable sorting are needed for events that are qualified by more than one condition.
for (auto i = events_size; i > 0; --i)
{
auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];

View File

@ -13,6 +13,7 @@
#include <Common/NaNUtils.h>
#include <Poco/Exception.h>
namespace DB
{
namespace ErrorCodes
@ -162,6 +163,11 @@ public:
sorted = false;
}
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
#endif
void write(DB::WriteBuffer & buf) const
{
size_t size = samples.size();
@ -169,9 +175,26 @@ public:
DB::writeIntBinary<size_t>(total_values, buf);
for (size_t i = 0; i < size; ++i)
DB::writePODBinary(samples[i], buf);
{
/// There was a mistake in this function.
/// Instead of correctly serializing the elements,
/// it was writing them with uninitialized padding.
/// Here we ensure that padding is zero without changing the protocol.
/// TODO: After implementation of "versioning aggregate function state",
/// change the serialization format.
Element elem;
memset(&elem, 0, sizeof(elem));
elem = samples[i];
DB::writePODBinary(elem, buf);
}
}
#if !__clang__
#pragma GCC diagnostic pop
#endif
private:
/// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements.
using Element = std::pair<T, UInt32>;

View File

@ -158,7 +158,11 @@ macro(add_object_library name common_path)
list (APPEND all_modules ${name})
add_headers_and_sources(${name} ${common_path})
add_library(${name} SHARED ${${name}_sources} ${${name}_headers})
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
if (OS_DARWIN)
target_link_libraries (${name} PRIVATE -Wl,-undefined,dynamic_lookup)
else()
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
endif()
endif ()
endmacro()
@ -168,6 +172,7 @@ add_object_library(clickhouse_core_mysql Core/MySQL)
add_object_library(clickhouse_compression Compression)
add_object_library(clickhouse_datastreams DataStreams)
add_object_library(clickhouse_datatypes DataTypes)
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
add_object_library(clickhouse_databases Databases)
add_object_library(clickhouse_databases_mysql Databases/MySQL)
add_object_library(clickhouse_disks Disks)
@ -215,7 +220,11 @@ else()
target_link_libraries (clickhouse_interpreters PRIVATE clickhouse_parsers_new jemalloc libdivide)
list (APPEND all_modules dbms)
# force all split libs to be linked
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
if (OS_DARWIN)
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error")
else()
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif()
endif ()
macro (dbms_target_include_directories)

View File

@ -88,9 +88,9 @@ public:
const String & user_, const String & password_,
const String & cluster_,
const String & cluster_secret_,
const String & client_name_ = "client",
Protocol::Compression compression_ = Protocol::Compression::Enable,
Protocol::Secure secure_ = Protocol::Secure::Disable,
const String & client_name_,
Protocol::Compression compression_,
Protocol::Secure secure_,
Poco::Timespan sync_request_timeout_ = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0))
:
host(host_), port(port_), default_database(default_database_),

View File

@ -56,9 +56,9 @@ public:
const String & password_,
const String & cluster_,
const String & cluster_secret_,
const String & client_name_ = "client",
Protocol::Compression compression_ = Protocol::Compression::Enable,
Protocol::Secure secure_ = Protocol::Secure::Disable,
const String & client_name_,
Protocol::Compression compression_,
Protocol::Secure secure_,
Int64 priority_ = 1)
: Base(max_connections_,
&Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),

View File

@ -13,6 +13,7 @@
#include <IO/ConnectionTimeouts.h>
namespace DB
{

View File

@ -362,9 +362,10 @@ int HedgedConnections::getReadyFileDescriptor(AsyncCallback async_callback)
epoll_event event;
event.data.fd = -1;
size_t events_count = 0;
bool blocking = !static_cast<bool>(async_callback);
while (events_count == 0)
{
events_count = epoll.getManyReady(1, &event, false);
events_count = epoll.getManyReady(1, &event, blocking);
if (!events_count && async_callback)
async_callback(epoll.getFileDescriptor(), 0, epoll.getDescription());
}

View File

@ -1211,7 +1211,6 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
assert_cast<const ColumnArray &>(*temporary_arrays.front()).getOffsetsPtr());
}
void ColumnArray::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);

View File

@ -536,7 +536,6 @@ void ColumnString::getExtremes(Field & min, Field & max) const
get(max_idx, max);
}
ColumnPtr ColumnString::compress() const
{
size_t source_chars_size = chars.size();

View File

@ -279,7 +279,6 @@ public:
return typeid(rhs) == typeid(ColumnString);
}
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

View File

@ -26,6 +26,9 @@ class ColumnGathererStream;
class Field;
class WeakHash32;
class ISerialization;
using SerializationPtr = std::shared_ptr<const ISerialization>;
/*
* Represents a set of equal ranges in previous column to perform sorting in current column.

View File

@ -4,6 +4,7 @@
#include <Common/PODArray.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnsCommon.h>
#include <Core/Field.h>
namespace DB

View File

@ -130,7 +130,6 @@ TEST(DateLUTTest, TimeValuesInMiddleOfRange)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 8078 /*unsigned*/);
EXPECT_EQ(lut.toRelativeHourNum(time), 435736 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 26144180 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 1568646000 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 1568650680 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 1568650811 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 201909 /*UInt32*/);
@ -191,7 +190,6 @@ TEST(DateLUTTest, TimeValuesAtLeftBoderOfRange)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 7880 /*unsigned*/); // ?
EXPECT_EQ(lut.toRelativeHourNum(time), 0 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 0 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 197001 /*UInt32*/);
@ -253,7 +251,6 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 8424 /*unsigned*/);
EXPECT_EQ(lut.toRelativeHourNum(time), 1192873 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 71572397 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 4294332000 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 4294343520 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 4294343872 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 210601 /*UInt32*/);

View File

@ -107,9 +107,9 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
IDataType::StreamCallbackWithType callback = [&](const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (IDataType::isSpecialCompressionAllowed(substream_path))
if (ISerialization::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
@ -121,8 +121,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
}
};
IDataType::SubstreamPath stream_path;
column_type->enumerateStreams(callback, stream_path);
ISerialization::SubstreamPath stream_path;
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());

View File

@ -1,40 +1,40 @@
#include <Coordination/NuKeeperLogStore.h>
#include <Coordination/KeeperLogStore.h>
namespace DB
{
NuKeeperLogStore::NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_)
: log(&Poco::Logger::get("NuKeeperLogStore"))
KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_)
: log(&Poco::Logger::get("KeeperLogStore"))
, changelog(changelogs_path, rotate_interval_, log)
, force_sync(force_sync_)
{
}
size_t NuKeeperLogStore::start_index() const
size_t KeeperLogStore::start_index() const
{
std::lock_guard lock(changelog_lock);
return changelog.getStartIndex();
}
void NuKeeperLogStore::init(size_t last_commited_log_index, size_t logs_to_keep)
void KeeperLogStore::init(size_t last_commited_log_index, size_t logs_to_keep)
{
std::lock_guard lock(changelog_lock);
changelog.readChangelogAndInitWriter(last_commited_log_index, logs_to_keep);
}
size_t NuKeeperLogStore::next_slot() const
size_t KeeperLogStore::next_slot() const
{
std::lock_guard lock(changelog_lock);
return changelog.getNextEntryIndex();
}
nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::last_entry() const
nuraft::ptr<nuraft::log_entry> KeeperLogStore::last_entry() const
{
std::lock_guard lock(changelog_lock);
return changelog.getLastEntry();
}
size_t NuKeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
size_t KeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
{
std::lock_guard lock(changelog_lock);
size_t idx = changelog.getNextEntryIndex();
@ -43,25 +43,25 @@ size_t NuKeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
}
void NuKeeperLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
void KeeperLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
{
std::lock_guard lock(changelog_lock);
changelog.writeAt(index, entry, force_sync);
}
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> NuKeeperLogStore::log_entries(size_t start, size_t end)
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> KeeperLogStore::log_entries(size_t start, size_t end)
{
std::lock_guard lock(changelog_lock);
return changelog.getLogEntriesBetween(start, end);
}
nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::entry_at(size_t index)
nuraft::ptr<nuraft::log_entry> KeeperLogStore::entry_at(size_t index)
{
std::lock_guard lock(changelog_lock);
return changelog.entryAt(index);
}
size_t NuKeeperLogStore::term_at(size_t index)
size_t KeeperLogStore::term_at(size_t index)
{
std::lock_guard lock(changelog_lock);
auto entry = changelog.entryAt(index);
@ -70,33 +70,33 @@ size_t NuKeeperLogStore::term_at(size_t index)
return 0;
}
nuraft::ptr<nuraft::buffer> NuKeeperLogStore::pack(size_t index, int32_t cnt)
nuraft::ptr<nuraft::buffer> KeeperLogStore::pack(size_t index, int32_t cnt)
{
std::lock_guard lock(changelog_lock);
return changelog.serializeEntriesToBuffer(index, cnt);
}
bool NuKeeperLogStore::compact(size_t last_log_index)
bool KeeperLogStore::compact(size_t last_log_index)
{
std::lock_guard lock(changelog_lock);
changelog.compact(last_log_index);
return true;
}
bool NuKeeperLogStore::flush()
bool KeeperLogStore::flush()
{
std::lock_guard lock(changelog_lock);
changelog.flush();
return true;
}
void NuKeeperLogStore::apply_pack(size_t index, nuraft::buffer & pack)
void KeeperLogStore::apply_pack(size_t index, nuraft::buffer & pack)
{
std::lock_guard lock(changelog_lock);
changelog.applyEntriesFromBuffer(index, pack, force_sync);
}
size_t NuKeeperLogStore::size() const
size_t KeeperLogStore::size() const
{
std::lock_guard lock(changelog_lock);
return changelog.size();

View File

@ -9,10 +9,10 @@
namespace DB
{
class NuKeeperLogStore : public nuraft::log_store
class KeeperLogStore : public nuraft::log_store
{
public:
NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_);
KeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_);
void init(size_t last_commited_log_index, size_t logs_to_keep);

View File

@ -1,7 +1,7 @@
#include <Coordination/NuKeeperServer.h>
#include <Coordination/KeeperServer.h>
#include <Coordination/LoggerWrapper.h>
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStateManager.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h>
#include <IO/ReadHelpers.h>
@ -18,7 +18,7 @@ namespace ErrorCodes
extern const int RAFT_ERROR;
}
NuKeeperServer::NuKeeperServer(
KeeperServer::KeeperServer(
int server_id_,
const CoordinationSettingsPtr & coordination_settings_,
const Poco::Util::AbstractConfiguration & config,
@ -26,15 +26,18 @@ NuKeeperServer::NuKeeperServer(
SnapshotsQueue & snapshots_queue_)
: server_id(server_id_)
, coordination_settings(coordination_settings_)
, state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, snapshots_queue_, config.getString("test_keeper_server.snapshot_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/snapshots"), coordination_settings))
, state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
, state_machine(nuraft::cs_new<KeeperStateMachine>(
responses_queue_, snapshots_queue_,
config.getString("keeper_server.snapshot_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/snapshots"),
coordination_settings))
, state_manager(nuraft::cs_new<KeeperStateManager>(server_id, "keeper_server", config, coordination_settings))
, responses_queue(responses_queue_)
{
if (coordination_settings->quorum_reads)
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Quorum reads enabled, NuKeeper will work slower.");
LOG_WARNING(&Poco::Logger::get("KeeperServer"), "Quorum reads enabled, Keeper will work slower.");
}
void NuKeeperServer::startup()
void KeeperServer::startup()
{
state_machine->init();
@ -84,13 +87,13 @@ void NuKeeperServer::startup()
throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
}
void NuKeeperServer::shutdown()
void KeeperServer::shutdown()
{
state_machine->shutdownStorage();
state_manager->flushLogStore();
auto timeout = coordination_settings->shutdown_timeout.totalSeconds();
if (!launcher.shutdown(timeout))
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", timeout);
LOG_WARNING(&Poco::Logger::get("KeeperServer"), "Failed to shutdown RAFT server in {} seconds", timeout);
}
namespace
@ -106,7 +109,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
}
void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
void KeeperServer::putRequest(const KeeperStorage::RequestForSession & request_for_session)
{
auto [session_id, request] = request_for_session;
if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
@ -123,29 +126,29 @@ void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & reque
auto result = raft_instance->append_entries(entries);
if (!result->get_accepted())
{
NuKeeperStorage::ResponsesForSessions responses;
KeeperStorage::ResponsesForSessions responses;
auto response = request->makeResponse();
response->xid = request->xid;
response->zxid = 0;
response->error = Coordination::Error::ZOPERATIONTIMEOUT;
responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
}
if (result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
{
NuKeeperStorage::ResponsesForSessions responses;
KeeperStorage::ResponsesForSessions responses;
auto response = request->makeResponse();
response->xid = request->xid;
response->zxid = 0;
response->error = Coordination::Error::ZOPERATIONTIMEOUT;
responses_queue.push(DB::NuKeeperStorage::ResponseForSession{session_id, response});
responses_queue.push(DB::KeeperStorage::ResponseForSession{session_id, response});
}
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
throw Exception(ErrorCodes::RAFT_ERROR, "Requests result failed with code {} and message: '{}'", result->get_result_code(), result->get_result_str());
}
}
int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
int64_t KeeperServer::getSessionID(int64_t session_timeout_ms)
{
auto entry = nuraft::buffer::alloc(sizeof(int64_t));
/// Just special session request
@ -170,17 +173,17 @@ int64_t NuKeeperServer::getSessionID(int64_t session_timeout_ms)
return bs_resp.get_i64();
}
bool NuKeeperServer::isLeader() const
bool KeeperServer::isLeader() const
{
return raft_instance->is_leader();
}
bool NuKeeperServer::isLeaderAlive() const
bool KeeperServer::isLeaderAlive() const
{
return raft_instance->is_leader_alive();
}
nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
{
size_t last_commited = state_machine->last_commit_index();
size_t next_index = state_manager->getLogStore()->next_slot();
@ -240,7 +243,7 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
}
}
void NuKeeperServer::waitInit()
void KeeperServer::waitInit()
{
std::unique_lock lock(initialized_mutex);
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
@ -248,7 +251,7 @@ void NuKeeperServer::waitInit()
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
}
std::unordered_set<int64_t> NuKeeperServer::getDeadSessions()
std::unordered_set<int64_t> KeeperServer::getDeadSessions()
{
return state_machine->getDeadSessions();
}

View File

@ -2,25 +2,25 @@
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Coordination/InMemoryLogStore.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/NuKeeperStorage.h>
#include <Coordination/KeeperStateManager.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStorage.h>
#include <Coordination/CoordinationSettings.h>
#include <unordered_map>
namespace DB
{
class NuKeeperServer
class KeeperServer
{
private:
int server_id;
CoordinationSettingsPtr coordination_settings;
nuraft::ptr<NuKeeperStateMachine> state_machine;
nuraft::ptr<KeeperStateMachine> state_machine;
nuraft::ptr<NuKeeperStateManager> state_manager;
nuraft::ptr<KeeperStateManager> state_manager;
nuraft::raft_launcher launcher;
@ -38,7 +38,7 @@ private:
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
public:
NuKeeperServer(
KeeperServer(
int server_id_,
const CoordinationSettingsPtr & coordination_settings_,
const Poco::Util::AbstractConfiguration & config,
@ -47,7 +47,7 @@ public:
void startup();
void putRequest(const NuKeeperStorage::RequestForSession & request);
void putRequest(const KeeperStorage::RequestForSession & request);
int64_t getSessionID(int64_t session_timeout_ms);

View File

@ -1,4 +1,4 @@
#include <Coordination/NuKeeperSnapshotManager.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <IO/WriteHelpers.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
@ -51,7 +51,7 @@ namespace
return "/";
}
void writeNode(const NuKeeperStorage::Node & node, WriteBuffer & out)
void writeNode(const KeeperStorage::Node & node, WriteBuffer & out)
{
writeBinary(node.data, out);
@ -81,7 +81,7 @@ namespace
writeBinary(node.seq_num, out);
}
void readNode(NuKeeperStorage::Node & node, ReadBuffer & in)
void readNode(KeeperStorage::Node & node, ReadBuffer & in)
{
readBinary(node.data, in);
@ -132,7 +132,7 @@ namespace
}
void NuKeeperStorageSnapshot::serialize(const NuKeeperStorageSnapshot & snapshot, WriteBuffer & out)
void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out)
{
writeBinary(static_cast<uint8_t>(snapshot.version), out);
serializeSnapshotMetadata(snapshot.snapshot_meta, out);
@ -159,7 +159,7 @@ void NuKeeperStorageSnapshot::serialize(const NuKeeperStorageSnapshot & snapshot
}
}
SnapshotMetadataPtr NuKeeperStorageSnapshot::deserialize(NuKeeperStorage & storage, ReadBuffer & in)
SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage, ReadBuffer & in)
{
uint8_t version;
readBinary(version, in);
@ -180,7 +180,7 @@ SnapshotMetadataPtr NuKeeperStorageSnapshot::deserialize(NuKeeperStorage & stora
{
std::string path;
readBinary(path, in);
NuKeeperStorage::Node node;
KeeperStorage::Node node;
readNode(node, in);
storage.container.insertOrReplace(path, node);
if (node.stat.ephemeralOwner != 0)
@ -194,7 +194,7 @@ SnapshotMetadataPtr NuKeeperStorageSnapshot::deserialize(NuKeeperStorage & stora
if (itr.key != "/")
{
auto parent_path = parentPath(itr.key);
storage.container.updateValue(parent_path, [&path = itr.key] (NuKeeperStorage::Node & value) { value.children.insert(getBaseName(path)); });
storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); });
}
}
@ -214,7 +214,7 @@ SnapshotMetadataPtr NuKeeperStorageSnapshot::deserialize(NuKeeperStorage & stora
return result;
}
NuKeeperStorageSnapshot::NuKeeperStorageSnapshot(NuKeeperStorage * storage_, size_t up_to_log_idx_)
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, size_t up_to_log_idx_)
: storage(storage_)
, snapshot_meta(std::make_shared<SnapshotMetadata>(up_to_log_idx_, 0, std::make_shared<nuraft::cluster_config>()))
, session_id(storage->session_id_counter)
@ -225,7 +225,7 @@ NuKeeperStorageSnapshot::NuKeeperStorageSnapshot(NuKeeperStorage * storage_, siz
session_and_timeout = storage->getActiveSessions();
}
NuKeeperStorageSnapshot::NuKeeperStorageSnapshot(NuKeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_)
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_)
: storage(storage_)
, snapshot_meta(snapshot_meta_)
, session_id(storage->session_id_counter)
@ -236,12 +236,12 @@ NuKeeperStorageSnapshot::NuKeeperStorageSnapshot(NuKeeperStorage * storage_, con
session_and_timeout = storage->getActiveSessions();
}
NuKeeperStorageSnapshot::~NuKeeperStorageSnapshot()
KeeperStorageSnapshot::~KeeperStorageSnapshot()
{
storage->disableSnapshotMode();
}
NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_)
KeeperSnapshotManager::KeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_)
: snapshots_path(snapshots_path_)
, snapshots_to_keep(snapshots_to_keep_)
, storage_tick_time(storage_tick_time_)
@ -266,7 +266,7 @@ NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_p
}
std::string NuKeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx)
std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx)
{
ReadBufferFromNuraftBuffer reader(buffer);
@ -287,7 +287,7 @@ std::string NuKeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffe
return new_snapshot_path;
}
nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk()
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk()
{
while (!existing_snapshots.empty())
{
@ -307,7 +307,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::deserializeLatestSnapshotBu
return nullptr;
}
nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const
{
const std::string & snapshot_path = existing_snapshots.at(up_to_log_idx);
WriteBufferFromNuraftBuffer writer;
@ -316,26 +316,26 @@ nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::deserializeSnapshotBufferFr
return writer.getBuffer();
}
nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot)
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot)
{
WriteBufferFromNuraftBuffer writer;
CompressedWriteBuffer compressed_writer(writer);
NuKeeperStorageSnapshot::serialize(snapshot, compressed_writer);
KeeperStorageSnapshot::serialize(snapshot, compressed_writer);
compressed_writer.finalize();
return writer.getBuffer();
}
SnapshotMetaAndStorage NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
SnapshotMetaAndStorage KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
{
ReadBufferFromNuraftBuffer reader(buffer);
CompressedReadBuffer compressed_reader(reader);
auto storage = std::make_unique<NuKeeperStorage>(storage_tick_time);
auto snapshot_metadata = NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
auto storage = std::make_unique<KeeperStorage>(storage_tick_time);
auto snapshot_metadata = KeeperStorageSnapshot::deserialize(*storage, compressed_reader);
return std::make_pair(snapshot_metadata, std::move(storage));
}
SnapshotMetaAndStorage NuKeeperSnapshotManager::restoreFromLatestSnapshot()
SnapshotMetaAndStorage KeeperSnapshotManager::restoreFromLatestSnapshot()
{
if (existing_snapshots.empty())
return {};
@ -346,13 +346,13 @@ SnapshotMetaAndStorage NuKeeperSnapshotManager::restoreFromLatestSnapshot()
return deserializeSnapshotFromBuffer(buffer);
}
void NuKeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
{
while (existing_snapshots.size() > snapshots_to_keep)
removeSnapshot(existing_snapshots.begin()->first);
}
void NuKeeperSnapshotManager::removeSnapshot(size_t log_idx)
void KeeperSnapshotManager::removeSnapshot(size_t log_idx)
{
auto itr = existing_snapshots.find(log_idx);
if (itr == existing_snapshots.end())

View File

@ -1,6 +1,6 @@
#pragma once
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Coordination/NuKeeperStorage.h>
#include <Coordination/KeeperStorage.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
@ -15,42 +15,42 @@ enum SnapshotVersion : uint8_t
V0 = 0,
};
struct NuKeeperStorageSnapshot
struct KeeperStorageSnapshot
{
public:
NuKeeperStorageSnapshot(NuKeeperStorage * storage_, size_t up_to_log_idx_);
KeeperStorageSnapshot(KeeperStorage * storage_, size_t up_to_log_idx_);
NuKeeperStorageSnapshot(NuKeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_);
~NuKeeperStorageSnapshot();
KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_);
~KeeperStorageSnapshot();
static void serialize(const NuKeeperStorageSnapshot & snapshot, WriteBuffer & out);
static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out);
static SnapshotMetadataPtr deserialize(NuKeeperStorage & storage, ReadBuffer & in);
static SnapshotMetadataPtr deserialize(KeeperStorage & storage, ReadBuffer & in);
NuKeeperStorage * storage;
KeeperStorage * storage;
SnapshotVersion version = SnapshotVersion::V0;
SnapshotMetadataPtr snapshot_meta;
int64_t session_id;
size_t snapshot_container_size;
NuKeeperStorage::Container::const_iterator begin;
KeeperStorage::Container::const_iterator begin;
SessionAndTimeout session_and_timeout;
};
using NuKeeperStorageSnapshotPtr = std::shared_ptr<NuKeeperStorageSnapshot>;
using CreateSnapshotCallback = std::function<void(NuKeeperStorageSnapshotPtr &&)>;
using KeeperStorageSnapshotPtr = std::shared_ptr<KeeperStorageSnapshot>;
using CreateSnapshotCallback = std::function<void(KeeperStorageSnapshotPtr &&)>;
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, NuKeeperStoragePtr>;
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, KeeperStoragePtr>;
class NuKeeperSnapshotManager
class KeeperSnapshotManager
{
public:
NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_ = 500);
KeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_ = 500);
SnapshotMetaAndStorage restoreFromLatestSnapshot();
static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot);
static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot);
std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx);
SnapshotMetaAndStorage deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
@ -82,7 +82,7 @@ private:
struct CreateSnapshotTask
{
NuKeeperStorageSnapshotPtr snapshot;
KeeperStorageSnapshotPtr snapshot;
CreateSnapshotCallback create_snapshot;
};

View File

@ -1,9 +1,9 @@
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <IO/ReadHelpers.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Coordination/NuKeeperSnapshotManager.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <future>
namespace DB
@ -14,10 +14,10 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
KeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
{
ReadBufferFromNuraftBuffer buffer(data);
NuKeeperStorage::RequestForSession request_for_session;
KeeperStorage::RequestForSession request_for_session;
readIntBinary(request_for_session.session_id, buffer);
int32_t length;
@ -36,17 +36,17 @@ NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
return request_for_session;
}
NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
KeeperStateMachine::KeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
: coordination_settings(coordination_settings_)
, snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep, coordination_settings->dead_session_check_period_ms.totalMicroseconds())
, responses_queue(responses_queue_)
, snapshots_queue(snapshots_queue_)
, last_committed_idx(0)
, log(&Poco::Logger::get("NuKeeperStateMachine"))
, log(&Poco::Logger::get("KeeperStateMachine"))
{
}
void NuKeeperStateMachine::init()
void KeeperStateMachine::init()
{
/// Do everything without mutexes, no other threads exist.
LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots());
@ -85,10 +85,10 @@ void NuKeeperStateMachine::init()
}
if (!storage)
storage = std::make_unique<NuKeeperStorage>(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
storage = std::make_unique<KeeperStorage>(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
}
nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
{
if (data.size() == sizeof(int64_t))
{
@ -109,7 +109,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
else
{
auto request_for_session = parseRequest(data);
NuKeeperStorage::ResponsesForSessions responses_for_sessions;
KeeperStorage::ResponsesForSessions responses_for_sessions;
{
std::lock_guard lock(storage_lock);
responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx);
@ -122,7 +122,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
}
}
bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
{
LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
nuraft::ptr<nuraft::buffer> latest_snapshot_ptr;
@ -142,14 +142,14 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
return true;
}
nuraft::ptr<nuraft::snapshot> NuKeeperStateMachine::last_snapshot()
nuraft::ptr<nuraft::snapshot> KeeperStateMachine::last_snapshot()
{
/// Just return the latest snapshot.
std::lock_guard<std::mutex> lock(snapshots_lock);
return latest_snapshot_meta;
}
void NuKeeperStateMachine::create_snapshot(
void KeeperStateMachine::create_snapshot(
nuraft::snapshot & s,
nuraft::async_result<bool>::handler_type & when_done)
{
@ -160,10 +160,10 @@ void NuKeeperStateMachine::create_snapshot(
CreateSnapshotTask snapshot_task;
{
std::lock_guard lock(storage_lock);
snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
}
snapshot_task.create_snapshot = [this, when_done] (NuKeeperStorageSnapshotPtr && snapshot)
snapshot_task.create_snapshot = [this, when_done] (KeeperStorageSnapshotPtr && snapshot)
{
nuraft::ptr<std::exception> exception(nullptr);
bool ret = true;
@ -203,7 +203,7 @@ void NuKeeperStateMachine::create_snapshot(
snapshots_queue.push(std::move(snapshot_task));
}
void NuKeeperStateMachine::save_logical_snp_obj(
void KeeperStateMachine::save_logical_snp_obj(
nuraft::snapshot & s,
size_t & obj_id,
nuraft::buffer & data,
@ -217,7 +217,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
if (obj_id == 0)
{
std::lock_guard lock(storage_lock);
NuKeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
}
else
@ -235,7 +235,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
std::shared_ptr<std::promise<void>> waiter = std::make_shared<std::promise<void>>();
auto future = waiter->get_future();
snapshot_task.snapshot = nullptr;
snapshot_task.create_snapshot = [this, waiter, cloned_buffer, log_idx = s.get_last_log_idx()] (NuKeeperStorageSnapshotPtr &&)
snapshot_task.create_snapshot = [this, waiter, cloned_buffer, log_idx = s.get_last_log_idx()] (KeeperStorageSnapshotPtr &&)
{
try
{
@ -261,7 +261,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
obj_id++;
}
int NuKeeperStateMachine::read_logical_snp_obj(
int KeeperStateMachine::read_logical_snp_obj(
nuraft::snapshot & s,
void* & /*user_snp_ctx*/,
ulong obj_id,
@ -289,9 +289,9 @@ int NuKeeperStateMachine::read_logical_snp_obj(
return 0;
}
void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session)
void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session)
{
NuKeeperStorage::ResponsesForSessions responses;
KeeperStorage::ResponsesForSessions responses;
{
std::lock_guard lock(storage_lock);
responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
@ -300,13 +300,13 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
responses_queue.push(response);
}
std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
std::unordered_set<int64_t> KeeperStateMachine::getDeadSessions()
{
std::lock_guard lock(storage_lock);
return storage->getDeadSessions();
}
void NuKeeperStateMachine::shutdownStorage()
void KeeperStateMachine::shutdownStorage()
{
std::lock_guard lock(storage_lock);
storage->finalize();

View File

@ -1,22 +1,22 @@
#pragma once
#include <Coordination/NuKeeperStorage.h>
#include <Coordination/KeeperStorage.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <common/logger_useful.h>
#include <Coordination/ThreadSafeQueue.h>
#include <Coordination/CoordinationSettings.h>
#include <Coordination/NuKeeperSnapshotManager.h>
#include <Coordination/KeeperSnapshotManager.h>
namespace DB
{
using ResponsesQueue = ThreadSafeQueue<NuKeeperStorage::ResponseForSession>;
using ResponsesQueue = ThreadSafeQueue<KeeperStorage::ResponseForSession>;
using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
class NuKeeperStateMachine : public nuraft::state_machine
class KeeperStateMachine : public nuraft::state_machine
{
public:
NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_);
KeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_);
void init();
@ -50,12 +50,12 @@ public:
nuraft::ptr<nuraft::buffer> & data_out,
bool & is_last_obj) override;
NuKeeperStorage & getStorage()
KeeperStorage & getStorage()
{
return *storage;
}
void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
void processReadRequest(const KeeperStorage::RequestForSession & request_for_session);
std::unordered_set<int64_t> getDeadSessions();
@ -68,9 +68,9 @@ private:
CoordinationSettingsPtr coordination_settings;
NuKeeperStoragePtr storage;
KeeperStoragePtr storage;
NuKeeperSnapshotManager snapshot_manager;
KeeperSnapshotManager snapshot_manager;
ResponsesQueue & responses_queue;

View File

@ -1,4 +1,4 @@
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/KeeperStateManager.h>
#include <Common/Exception.h>
namespace DB
@ -9,23 +9,23 @@ namespace ErrorCodes
extern const int RAFT_ERROR;
}
NuKeeperStateManager::NuKeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, my_port(port)
, log_store(nuraft::cs_new<NuKeeperLogStore>(logs_path, 5000, false))
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
cluster_config->get_servers().push_back(peer_config);
}
NuKeeperStateManager::NuKeeperStateManager(
KeeperStateManager::KeeperStateManager(
int my_server_id_,
const std::string & config_prefix,
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings)
: my_server_id(my_server_id_)
, log_store(nuraft::cs_new<NuKeeperLogStore>(
, log_store(nuraft::cs_new<KeeperLogStore>(
config.getString(config_prefix + ".log_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/logs"),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
@ -64,17 +64,17 @@ NuKeeperStateManager::NuKeeperStateManager(
throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
}
void NuKeeperStateManager::loadLogStore(size_t last_commited_index, size_t logs_to_keep)
void KeeperStateManager::loadLogStore(size_t last_commited_index, size_t logs_to_keep)
{
log_store->init(last_commited_index, logs_to_keep);
}
void NuKeeperStateManager::flushLogStore()
void KeeperStateManager::flushLogStore()
{
log_store->flush();
}
void NuKeeperStateManager::save_config(const nuraft::cluster_config & config)
void KeeperStateManager::save_config(const nuraft::cluster_config & config)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.
@ -82,7 +82,7 @@ void NuKeeperStateManager::save_config(const nuraft::cluster_config & config)
cluster_config = nuraft::cluster_config::deserialize(*buf);
}
void NuKeeperStateManager::save_state(const nuraft::srv_state & state)
void KeeperStateManager::save_state(const nuraft::srv_state & state)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.

View File

@ -2,7 +2,7 @@
#include <Core/Types.h>
#include <string>
#include <Coordination/NuKeeperLogStore.h>
#include <Coordination/KeeperLogStore.h>
#include <Coordination/CoordinationSettings.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Poco/Util/AbstractConfiguration.h>
@ -10,16 +10,16 @@
namespace DB
{
class NuKeeperStateManager : public nuraft::state_mgr
class KeeperStateManager : public nuraft::state_mgr
{
public:
NuKeeperStateManager(
KeeperStateManager(
int server_id_,
const std::string & config_prefix,
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings);
NuKeeperStateManager(
KeeperStateManager(
int server_id_,
const std::string & host,
int port,
@ -52,7 +52,7 @@ public:
return start_as_follower_servers.count(my_server_id);
}
nuraft::ptr<NuKeeperLogStore> getLogStore() const { return log_store; }
nuraft::ptr<KeeperLogStore> getLogStore() const { return log_store; }
size_t getTotalServers() const { return total_servers; }
@ -61,7 +61,7 @@ private:
int my_port;
size_t total_servers{0};
std::unordered_set<int> start_as_follower_servers;
nuraft::ptr<NuKeeperLogStore> log_store;
nuraft::ptr<KeeperLogStore> log_store;
nuraft::ptr<nuraft::srv_config> my_server_config;
nuraft::ptr<nuraft::cluster_config> cluster_config;
nuraft::ptr<nuraft::srv_state> server_state;

View File

@ -1,4 +1,4 @@
#include <Coordination/NuKeeperStorage.h>
#include <Coordination/KeeperStorage.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/setThreadName.h>
#include <mutex>
@ -31,9 +31,9 @@ static std::string getBaseName(const String & path)
return std::string{&path[basename_start + 1], path.length() - basename_start - 1};
}
static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type)
{
NuKeeperStorage::ResponsesForSessions result;
KeeperStorage::ResponsesForSessions result;
auto it = watches.find(path);
if (it != watches.end())
{
@ -44,7 +44,7 @@ static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & p
watch_response->type = event_type;
watch_response->state = Coordination::State::CONNECTED;
for (auto watcher_session : it->second)
result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_response});
result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response});
watches.erase(it);
}
@ -60,14 +60,14 @@ static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & p
watch_list_response->type = Coordination::Event::CHILD;
watch_list_response->state = Coordination::State::CONNECTED;
for (auto watcher_session : it->second)
result.push_back(NuKeeperStorage::ResponseForSession{watcher_session, watch_list_response});
result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response});
list_watches.erase(it);
}
return result;
}
NuKeeperStorage::NuKeeperStorage(int64_t tick_time_ms)
KeeperStorage::KeeperStorage(int64_t tick_time_ms)
: session_expiry_queue(tick_time_ms)
{
container.insert("/", Node());
@ -75,32 +75,32 @@ NuKeeperStorage::NuKeeperStorage(int64_t tick_time_ms)
using Undo = std::function<void()>;
struct NuKeeperStorageRequest
struct KeeperStorageRequest
{
Coordination::ZooKeeperRequestPtr zk_request;
explicit NuKeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
explicit KeeperStorageRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
: zk_request(zk_request_)
{}
virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
virtual NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & /*watches*/, NuKeeperStorage::Watches & /*list_watches*/) const { return {}; }
virtual std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const = 0;
virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; }
virtual ~NuKeeperStorageRequest() = default;
virtual ~KeeperStorageRequest() = default;
};
struct NuKeeperStorageHeartbeatRequest final : public NuKeeperStorageRequest
struct KeeperStorageHeartbeatRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & /* container */, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
{
return {zk_request->makeResponse(), {}};
}
};
struct NuKeeperStorageSyncRequest final : public NuKeeperStorageRequest
struct KeeperStorageSyncRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & /* container */, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & /* container */, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
{
auto response = zk_request->makeResponse();
dynamic_cast<Coordination::ZooKeeperSyncResponse *>(response.get())->path = dynamic_cast<Coordination::ZooKeeperSyncRequest *>(zk_request.get())->path;
@ -108,16 +108,16 @@ struct NuKeeperStorageSyncRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
struct KeeperStorageCreateRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
using KeeperStorageRequest::KeeperStorageRequest;
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override
{
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED);
}
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Undo undo;
@ -143,7 +143,7 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
}
else
{
NuKeeperStorage::Node created_node;
KeeperStorage::Node created_node;
created_node.stat.czxid = zxid;
created_node.stat.mzxid = zxid;
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
@ -167,7 +167,7 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
}
auto child_path = getBaseName(path_created);
container.updateValue(parent_path, [child_path] (NuKeeperStorage::Node & parent)
container.updateValue(parent_path, [child_path] (KeeperStorage::Node & parent)
{
/// Increment sequential number even if node is not sequential
++parent.seq_num;
@ -188,7 +188,7 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
if (is_ephemeral)
ephemerals[session_id].erase(path_created);
container.updateValue(parent_path, [child_path] (NuKeeperStorage::Node & undo_parent)
container.updateValue(parent_path, [child_path] (KeeperStorage::Node & undo_parent)
{
--undo_parent.stat.cversion;
--undo_parent.stat.numChildren;
@ -205,10 +205,10 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
struct KeeperStorageGetRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /* zxid */, int64_t /* session_id */) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperGetResponse & response = dynamic_cast<Coordination::ZooKeeperGetResponse &>(*response_ptr);
@ -230,10 +230,10 @@ struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
struct KeeperStorageRemoveRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t /*session_id*/) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
@ -265,7 +265,7 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
}
auto child_basename = getBaseName(it->key);
container.updateValue(parentPath(request.path), [&child_basename] (NuKeeperStorage::Node & parent)
container.updateValue(parentPath(request.path), [&child_basename] (KeeperStorage::Node & parent)
{
--parent.stat.numChildren;
++parent.stat.cversion;
@ -282,7 +282,7 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
ephemerals[prev_node.stat.ephemeralOwner].emplace(path);
container.insert(path, prev_node);
container.updateValue(parentPath(path), [&child_basename] (NuKeeperStorage::Node & parent)
container.updateValue(parentPath(path), [&child_basename] (KeeperStorage::Node & parent)
{
++parent.stat.numChildren;
--parent.stat.cversion;
@ -294,16 +294,16 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
return { response_ptr, undo };
}
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override
{
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED);
}
};
struct NuKeeperStorageExistsRequest final : public NuKeeperStorageRequest
struct KeeperStorageExistsRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /* session_id */) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperExistsResponse & response = dynamic_cast<Coordination::ZooKeeperExistsResponse &>(*response_ptr);
@ -324,10 +324,10 @@ struct NuKeeperStorageExistsRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
struct KeeperStorageSetRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & /* ephemerals */, int64_t zxid, int64_t /* session_id */) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperSetResponse & response = dynamic_cast<Coordination::ZooKeeperSetResponse &>(*response_ptr);
@ -343,7 +343,7 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
{
auto prev_node = it->value;
auto itr = container.updateValue(request.path, [zxid, request] (NuKeeperStorage::Node & value)
auto itr = container.updateValue(request.path, [zxid, request] (KeeperStorage::Node & value)
{
value.data = request.data;
value.stat.version++;
@ -353,7 +353,7 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
value.data = request.data;
});
container.updateValue(parentPath(request.path), [] (NuKeeperStorage::Node & parent)
container.updateValue(parentPath(request.path), [] (KeeperStorage::Node & parent)
{
parent.stat.cversion++;
});
@ -363,8 +363,8 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
undo = [prev_node, &container, path = request.path]
{
container.updateValue(path, [&prev_node] (NuKeeperStorage::Node & value) { value = prev_node; });
container.updateValue(parentPath(path), [] (NuKeeperStorage::Node & parent)
container.updateValue(path, [&prev_node] (KeeperStorage::Node & value) { value = prev_node; });
container.updateValue(parentPath(path), [] (KeeperStorage::Node & parent)
{
parent.stat.cversion--;
});
@ -378,16 +378,16 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
return { response_ptr, undo };
}
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override
{
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
}
};
struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
struct KeeperStorageListRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperListResponse & response = dynamic_cast<Coordination::ZooKeeperListResponse &>(*response_ptr);
@ -415,10 +415,10 @@ struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageCheckRequest final : public NuKeeperStorageRequest
struct KeeperStorageCheckRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & /* ephemerals */, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperCheckResponse & response = dynamic_cast<Coordination::ZooKeeperCheckResponse &>(*response_ptr);
@ -441,11 +441,11 @@ struct NuKeeperStorageCheckRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
struct KeeperStorageMultiRequest final : public KeeperStorageRequest
{
std::vector<NuKeeperStorageRequestPtr> concrete_requests;
explicit NuKeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
: NuKeeperStorageRequest(zk_request_)
std::vector<KeeperStorageRequestPtr> concrete_requests;
explicit KeeperStorageMultiRequest(const Coordination::ZooKeeperRequestPtr & zk_request_)
: KeeperStorageRequest(zk_request_)
{
Coordination::ZooKeeperMultiRequest & request = dynamic_cast<Coordination::ZooKeeperMultiRequest &>(*zk_request);
concrete_requests.reserve(request.requests.size());
@ -455,26 +455,26 @@ struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
{
concrete_requests.push_back(std::make_shared<NuKeeperStorageCreateRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageCreateRequest>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
{
concrete_requests.push_back(std::make_shared<NuKeeperStorageRemoveRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequest>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
{
concrete_requests.push_back(std::make_shared<NuKeeperStorageSetRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequest>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
{
concrete_requests.push_back(std::make_shared<NuKeeperStorageCheckRequest>(sub_zk_request));
concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequest>(sub_zk_request));
}
else
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
}
}
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container & container, KeeperStorage::Ephemerals & ephemerals, int64_t zxid, int64_t session_id) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperMultiResponse & response = dynamic_cast<Coordination::ZooKeeperMultiResponse &>(*response_ptr);
@ -527,9 +527,9 @@ struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
}
}
NuKeeperStorage::ResponsesForSessions processWatches(NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches) const override
KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override
{
NuKeeperStorage::ResponsesForSessions result;
KeeperStorage::ResponsesForSessions result;
for (const auto & generic_request : concrete_requests)
{
auto responses = generic_request->processWatches(watches, list_watches);
@ -539,16 +539,16 @@ struct NuKeeperStorageMultiRequest final : public NuKeeperStorageRequest
}
};
struct NuKeeperStorageCloseRequest final : public NuKeeperStorageRequest
struct KeeperStorageCloseRequest final : public KeeperStorageRequest
{
using NuKeeperStorageRequest::NuKeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container &, NuKeeperStorage::Ephemerals &, int64_t, int64_t) const override
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage::Container &, KeeperStorage::Ephemerals &, int64_t, int64_t) const override
{
throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR);
}
};
void NuKeeperStorage::finalize()
void KeeperStorage::finalize()
{
if (finalized)
throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR);
@ -568,20 +568,20 @@ void NuKeeperStorage::finalize()
}
class NuKeeperWrapperFactory final : private boost::noncopyable
class KeeperWrapperFactory final : private boost::noncopyable
{
public:
using Creator = std::function<NuKeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
using Creator = std::function<KeeperStorageRequestPtr(const Coordination::ZooKeeperRequestPtr &)>;
using OpNumToRequest = std::unordered_map<Coordination::OpNum, Creator>;
static NuKeeperWrapperFactory & instance()
static KeeperWrapperFactory & instance()
{
static NuKeeperWrapperFactory factory;
static KeeperWrapperFactory factory;
return factory;
}
NuKeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
KeeperStorageRequestPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const
{
auto it = op_num_to_request.find(zk_request->getOpNum());
if (it == op_num_to_request.end())
@ -598,37 +598,37 @@ public:
private:
OpNumToRequest op_num_to_request;
NuKeeperWrapperFactory();
KeeperWrapperFactory();
};
template<Coordination::OpNum num, typename RequestT>
void registerNuKeeperRequestWrapper(NuKeeperWrapperFactory & factory)
void registerKeeperRequestWrapper(KeeperWrapperFactory & factory)
{
factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared<RequestT>(zk_request); });
}
NuKeeperWrapperFactory::NuKeeperWrapperFactory()
KeeperWrapperFactory::KeeperWrapperFactory()
{
registerNuKeeperRequestWrapper<Coordination::OpNum::Heartbeat, NuKeeperStorageHeartbeatRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Sync, NuKeeperStorageSyncRequest>(*this);
//registerNuKeeperRequestWrapper<Coordination::OpNum::Auth, NuKeeperStorageAuthRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Close, NuKeeperStorageCloseRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Create, NuKeeperStorageCreateRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Remove, NuKeeperStorageRemoveRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Exists, NuKeeperStorageExistsRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Get, NuKeeperStorageGetRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Set, NuKeeperStorageSetRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::List, NuKeeperStorageListRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::SimpleList, NuKeeperStorageListRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Check, NuKeeperStorageCheckRequest>(*this);
registerNuKeeperRequestWrapper<Coordination::OpNum::Multi, NuKeeperStorageMultiRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Heartbeat, KeeperStorageHeartbeatRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Sync, KeeperStorageSyncRequest>(*this);
//registerKeeperRequestWrapper<Coordination::OpNum::Auth, KeeperStorageAuthRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Close, KeeperStorageCloseRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Create, KeeperStorageCreateRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Remove, KeeperStorageRemoveRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Exists, KeeperStorageExistsRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Get, KeeperStorageGetRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Set, KeeperStorageSetRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::List, KeeperStorageListRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::SimpleList, KeeperStorageListRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Check, KeeperStorageCheckRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Multi, KeeperStorageMultiRequest>(*this);
}
NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional<int64_t> new_last_zxid)
KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional<int64_t> new_last_zxid)
{
NuKeeperStorage::ResponsesForSessions results;
KeeperStorage::ResponsesForSessions results;
if (new_last_zxid)
{
if (zxid >= *new_last_zxid)
@ -645,7 +645,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
for (const auto & ephemeral_path : it->second)
{
container.erase(ephemeral_path);
container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (NuKeeperStorage::Node & parent)
container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (KeeperStorage::Node & parent)
{
--parent.stat.numChildren;
++parent.stat.cversion;
@ -669,7 +669,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
}
else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat)
{
NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request);
auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
response->xid = zk_request->xid;
response->zxid = getZXID();
@ -678,7 +678,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
}
else
{
NuKeeperStorageRequestPtr storage_request = NuKeeperWrapperFactory::instance().get(zk_request);
KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request);
auto [response, _] = storage_request->process(container, ephemerals, zxid, session_id);
if (zk_request->has_watch)
@ -715,7 +715,7 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
}
void NuKeeperStorage::clearDeadWatches(int64_t session_id)
void KeeperStorage::clearDeadWatches(int64_t session_id)
{
auto watches_it = sessions_and_watchers.find(session_id);
if (watches_it != sessions_and_watchers.end())

View File

@ -14,15 +14,15 @@ namespace DB
{
using namespace DB;
struct NuKeeperStorageRequest;
using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
struct KeeperStorageRequest;
using KeeperStorageRequestPtr = std::shared_ptr<KeeperStorageRequest>;
using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
using ChildrenSet = std::unordered_set<std::string>;
using SessionAndTimeout = std::unordered_map<int64_t, int64_t>;
struct NuKeeperStorageSnapshot;
struct KeeperStorageSnapshot;
class NuKeeperStorage
class KeeperStorage
{
public:
int64_t session_id_counter{1};
@ -80,7 +80,7 @@ public:
}
public:
NuKeeperStorage(int64_t tick_time_ms);
KeeperStorage(int64_t tick_time_ms);
int64_t getSessionID(int64_t session_timeout_ms)
{
@ -131,6 +131,6 @@ public:
}
};
using NuKeeperStoragePtr = std::unique_ptr<NuKeeperStorage>;
using KeeperStoragePtr = std::unique_ptr<KeeperStorage>;
}

View File

@ -1,4 +1,4 @@
#include <Coordination/NuKeeperStorageDispatcher.h>
#include <Coordination/KeeperStorageDispatcher.h>
#include <Common/setThreadName.h>
namespace DB
@ -11,18 +11,18 @@ namespace ErrorCodes
extern const int TIMEOUT_EXCEEDED;
}
NuKeeperStorageDispatcher::NuKeeperStorageDispatcher()
KeeperStorageDispatcher::KeeperStorageDispatcher()
: coordination_settings(std::make_shared<CoordinationSettings>())
, log(&Poco::Logger::get("NuKeeperDispatcher"))
, log(&Poco::Logger::get("KeeperDispatcher"))
{
}
void NuKeeperStorageDispatcher::requestThread()
void KeeperStorageDispatcher::requestThread()
{
setThreadName("NuKeeperReqT");
setThreadName("KeeperReqT");
while (!shutdown_called)
{
NuKeeperStorage::RequestForSession request;
KeeperStorage::RequestForSession request;
UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
@ -43,12 +43,12 @@ void NuKeeperStorageDispatcher::requestThread()
}
}
void NuKeeperStorageDispatcher::responseThread()
void KeeperStorageDispatcher::responseThread()
{
setThreadName("NuKeeperRspT");
setThreadName("KeeperRspT");
while (!shutdown_called)
{
NuKeeperStorage::ResponseForSession response_for_session;
KeeperStorage::ResponseForSession response_for_session;
UInt64 max_wait = UInt64(coordination_settings->operation_timeout_ms.totalMilliseconds());
@ -69,9 +69,9 @@ void NuKeeperStorageDispatcher::responseThread()
}
}
void NuKeeperStorageDispatcher::snapshotThread()
void KeeperStorageDispatcher::snapshotThread()
{
setThreadName("NuKeeperSnpT");
setThreadName("KeeperSnpT");
while (!shutdown_called)
{
CreateSnapshotTask task;
@ -91,7 +91,7 @@ void NuKeeperStorageDispatcher::snapshotThread()
}
}
void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
void KeeperStorageDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
{
std::lock_guard lock(session_to_response_callback_mutex);
auto session_writer = session_to_response_callback.find(session_id);
@ -104,7 +104,7 @@ void NuKeeperStorageDispatcher::setResponse(int64_t session_id, const Coordinati
session_to_response_callback.erase(session_writer);
}
bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
bool KeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
{
{
std::lock_guard lock(session_to_response_callback_mutex);
@ -112,7 +112,7 @@ bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestP
return false;
}
NuKeeperStorage::RequestForSession request_info;
KeeperStorage::RequestForSession request_info;
request_info.request = request;
request_info.session_id = session_id;
@ -125,18 +125,18 @@ bool NuKeeperStorageDispatcher::putRequest(const Coordination::ZooKeeperRequestP
return true;
}
void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
void KeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfiguration & config)
{
LOG_DEBUG(log, "Initializing storage dispatcher");
int myid = config.getInt("test_keeper_server.server_id");
int myid = config.getInt("keeper_server.server_id");
coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
server = std::make_unique<KeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
try
{
LOG_DEBUG(log, "Waiting server to initialize");
@ -158,7 +158,7 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
LOG_DEBUG(log, "Dispatcher initialized");
}
void NuKeeperStorageDispatcher::shutdown()
void KeeperStorageDispatcher::shutdown()
{
try
{
@ -191,7 +191,7 @@ void NuKeeperStorageDispatcher::shutdown()
if (server)
server->shutdown();
NuKeeperStorage::RequestForSession request_for_session;
KeeperStorage::RequestForSession request_for_session;
while (requests_queue.tryPop(request_for_session))
{
if (request_for_session.request)
@ -215,19 +215,19 @@ void NuKeeperStorageDispatcher::shutdown()
LOG_DEBUG(log, "Dispatcher shut down");
}
NuKeeperStorageDispatcher::~NuKeeperStorageDispatcher()
KeeperStorageDispatcher::~KeeperStorageDispatcher()
{
shutdown();
}
void NuKeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
void KeeperStorageDispatcher::registerSession(int64_t session_id, ZooKeeperResponseCallback callback)
{
std::lock_guard lock(session_to_response_callback_mutex);
if (!session_to_response_callback.try_emplace(session_id, callback).second)
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session with id {} already registered in dispatcher", session_id);
}
void NuKeeperStorageDispatcher::sessionCleanerTask()
void KeeperStorageDispatcher::sessionCleanerTask()
{
while (true)
{
@ -244,7 +244,7 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
request->xid = Coordination::CLOSE_XID;
NuKeeperStorage::RequestForSession request_info;
KeeperStorage::RequestForSession request_info;
request_info.request = request;
request_info.session_id = dead_session;
{
@ -265,7 +265,7 @@ void NuKeeperStorageDispatcher::sessionCleanerTask()
}
}
void NuKeeperStorageDispatcher::finishSession(int64_t session_id)
void KeeperStorageDispatcher::finishSession(int64_t session_id)
{
std::lock_guard lock(session_to_response_callback_mutex);
auto session_it = session_to_response_callback.find(session_id);

View File

@ -13,7 +13,7 @@
#include <Common/Exception.h>
#include <common/logger_useful.h>
#include <functional>
#include <Coordination/NuKeeperServer.h>
#include <Coordination/KeeperServer.h>
#include <Coordination/CoordinationSettings.h>
@ -22,14 +22,14 @@ namespace DB
using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr & response)>;
class NuKeeperStorageDispatcher
class KeeperStorageDispatcher
{
private:
std::mutex push_request_mutex;
CoordinationSettingsPtr coordination_settings;
using RequestsQueue = ConcurrentBoundedQueue<NuKeeperStorage::RequestForSession>;
using RequestsQueue = ConcurrentBoundedQueue<KeeperStorage::RequestForSession>;
using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
RequestsQueue requests_queue{1};
@ -46,7 +46,7 @@ private:
ThreadFromGlobalPool session_cleaner_thread;
ThreadFromGlobalPool snapshot_thread;
std::unique_ptr<NuKeeperServer> server;
std::unique_ptr<KeeperServer> server;
Poco::Logger * log;
@ -58,13 +58,13 @@ private:
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
public:
NuKeeperStorageDispatcher();
KeeperStorageDispatcher();
void initialize(const Poco::Util::AbstractConfiguration & config);
void shutdown();
~NuKeeperStorageDispatcher();
~KeeperStorageDispatcher();
bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);

View File

@ -1,24 +0,0 @@
#pragma once
#include <Common/ZooKeeper/ZooKeeperCommon.h>
namespace DB
{
struct NuKeeperRequest
{
int64_t session_id;
Coordination::ZooKeeperRequestPtr request;
};
using NuKeeperRequests = std::vector<NuKeeperRequest>;
struct NuKeeperResponse
{
int64_t session_id;
Coordination::ZooKeeperRequestPtr response;
};
using NuKeeperResponses = std::vector<NuKeeperResponse>;
}

View File

@ -9,10 +9,10 @@
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Coordination/InMemoryLogStore.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/NuKeeperSnapshotManager.h>
#include <Coordination/KeeperStateManager.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/SummingStateMachine.h>
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/LoggerWrapper.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h>
@ -24,7 +24,7 @@
#include <common/logger_useful.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <thread>
#include <Coordination/NuKeeperLogStore.h>
#include <Coordination/KeeperLogStore.h>
#include <Coordination/Changelog.h>
#include <filesystem>
@ -102,7 +102,7 @@ struct SimpliestRaftServer
, port(port_)
, endpoint(hostname + ":" + std::to_string(port))
, state_machine(nuraft::cs_new<StateMachine>())
, state_manager(nuraft::cs_new<DB::NuKeeperStateManager>(server_id, hostname, port, logs_path))
, state_manager(nuraft::cs_new<DB::KeeperStateManager>(server_id, hostname, port, logs_path))
{
state_manager->loadLogStore(1, 0);
nuraft::raft_params params;
@ -153,7 +153,7 @@ struct SimpliestRaftServer
nuraft::ptr<StateMachine> state_machine;
// State manager.
nuraft::ptr<DB::NuKeeperStateManager> state_manager;
nuraft::ptr<DB::KeeperStateManager> state_manager;
// Raft launcher.
nuraft::raft_launcher launcher;
@ -207,7 +207,7 @@ DB::LogEntryPtr getLogEntry(const std::string & s, size_t term)
TEST(CoordinationTest, ChangelogTestSimple)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
auto entry = getLogEntry("hello world", 77);
changelog.append(entry);
@ -221,7 +221,7 @@ TEST(CoordinationTest, ChangelogTestSimple)
TEST(CoordinationTest, ChangelogTestFile)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
auto entry = getLogEntry("hello world", 77);
changelog.append(entry);
@ -242,7 +242,7 @@ TEST(CoordinationTest, ChangelogTestFile)
TEST(CoordinationTest, ChangelogReadWrite)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 1000, true);
DB::KeeperLogStore changelog("./logs", 1000, true);
changelog.init(1, 0);
for (size_t i = 0; i < 10; ++i)
{
@ -250,7 +250,7 @@ TEST(CoordinationTest, ChangelogReadWrite)
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 10);
DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
DB::KeeperLogStore changelog_reader("./logs", 1000, true);
changelog_reader.init(1, 0);
EXPECT_EQ(changelog_reader.size(), 10);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
@ -269,7 +269,7 @@ TEST(CoordinationTest, ChangelogReadWrite)
TEST(CoordinationTest, ChangelogWriteAt)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 1000, true);
DB::KeeperLogStore changelog("./logs", 1000, true);
changelog.init(1, 0);
for (size_t i = 0; i < 10; ++i)
{
@ -285,7 +285,7 @@ TEST(CoordinationTest, ChangelogWriteAt)
EXPECT_EQ(changelog.entry_at(7)->get_term(), 77);
EXPECT_EQ(changelog.next_slot(), 8);
DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
DB::KeeperLogStore changelog_reader("./logs", 1000, true);
changelog_reader.init(1, 0);
EXPECT_EQ(changelog_reader.size(), changelog.size());
@ -298,7 +298,7 @@ TEST(CoordinationTest, ChangelogWriteAt)
TEST(CoordinationTest, ChangelogTestAppendAfterRead)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 7; ++i)
{
@ -310,7 +310,7 @@ TEST(CoordinationTest, ChangelogTestAppendAfterRead)
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
DB::KeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1, 0);
EXPECT_EQ(changelog_reader.size(), 7);
@ -346,7 +346,7 @@ TEST(CoordinationTest, ChangelogTestAppendAfterRead)
TEST(CoordinationTest, ChangelogTestCompaction)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 3; ++i)
@ -387,7 +387,7 @@ TEST(CoordinationTest, ChangelogTestCompaction)
EXPECT_EQ(changelog.next_slot(), 8);
EXPECT_EQ(changelog.last_entry()->get_term(), 60);
/// And we able to read it
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
DB::KeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(7, 0);
EXPECT_EQ(changelog_reader.size(), 1);
EXPECT_EQ(changelog_reader.start_index(), 7);
@ -398,7 +398,7 @@ TEST(CoordinationTest, ChangelogTestCompaction)
TEST(CoordinationTest, ChangelogTestBatchOperations)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 100, true);
DB::KeeperLogStore changelog("./logs", 100, true);
changelog.init(1, 0);
for (size_t i = 0; i < 10; ++i)
{
@ -410,7 +410,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperations)
auto entries = changelog.pack(1, 5);
DB::NuKeeperLogStore apply_changelog("./logs", 100, true);
DB::KeeperLogStore apply_changelog("./logs", 100, true);
apply_changelog.init(1, 0);
for (size_t i = 0; i < 10; ++i)
@ -440,7 +440,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperations)
TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 100, true);
DB::KeeperLogStore changelog("./logs", 100, true);
changelog.init(1, 0);
for (size_t i = 0; i < 10; ++i)
{
@ -453,7 +453,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
auto entries = changelog.pack(5, 5);
ChangelogDirTest test1("./logs1");
DB::NuKeeperLogStore changelog_new("./logs1", 100, true);
DB::KeeperLogStore changelog_new("./logs1", 100, true);
changelog_new.init(1, 0);
EXPECT_EQ(changelog_new.size(), 0);
@ -472,7 +472,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
EXPECT_EQ(changelog_new.start_index(), 5);
EXPECT_EQ(changelog_new.next_slot(), 11);
DB::NuKeeperLogStore changelog_reader("./logs1", 100, true);
DB::KeeperLogStore changelog_reader("./logs1", 100, true);
changelog_reader.init(5, 0);
}
@ -480,7 +480,7 @@ TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 33; ++i)
@ -515,7 +515,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_read("./logs", 5, true);
DB::KeeperLogStore changelog_read("./logs", 5, true);
changelog_read.init(1, 0);
EXPECT_EQ(changelog_read.size(), 7);
EXPECT_EQ(changelog_read.start_index(), 1);
@ -526,7 +526,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 33; ++i)
@ -561,7 +561,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_read("./logs", 5, true);
DB::KeeperLogStore changelog_read("./logs", 5, true);
changelog_read.init(1, 0);
EXPECT_EQ(changelog_read.size(), 11);
EXPECT_EQ(changelog_read.start_index(), 1);
@ -572,7 +572,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 33; ++i)
@ -611,7 +611,7 @@ TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)
TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
@ -630,7 +630,7 @@ TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)
EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin"));
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
DB::KeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1, 0);
auto entry = getLogEntry("36_hello_world", 360);
@ -652,7 +652,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
DB::KeeperLogStore changelog("./logs", 5, true);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
@ -672,7 +672,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
DB::WriteBufferFromFile plain_buf("./logs/changelog_11_15.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(0);
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
DB::KeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1, 0);
EXPECT_EQ(changelog_reader.size(), 10);
@ -701,7 +701,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_reader2("./logs", 5, true);
DB::KeeperLogStore changelog_reader2("./logs", 5, true);
changelog_reader2.init(1, 0);
EXPECT_EQ(changelog_reader2.size(), 11);
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
@ -711,7 +711,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 20, true);
DB::KeeperLogStore changelog("./logs", 20, true);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
@ -726,7 +726,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
DB::WriteBufferFromFile plain_buf("./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(140);
DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
DB::KeeperLogStore changelog_reader("./logs", 20, true);
changelog_reader.init(1, 0);
EXPECT_EQ(changelog_reader.size(), 2);
@ -739,7 +739,7 @@ TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
DB::NuKeeperLogStore changelog_reader2("./logs", 20, true);
DB::KeeperLogStore changelog_reader2("./logs", 20, true);
changelog_reader2.init(1, 0);
EXPECT_EQ(changelog_reader2.size(), 3);
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
@ -749,7 +749,7 @@ TEST(CoordinationTest, ChangelogTestLostFiles)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 20, true);
DB::KeeperLogStore changelog("./logs", 20, true);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
@ -763,7 +763,7 @@ TEST(CoordinationTest, ChangelogTestLostFiles)
fs::remove("./logs/changelog_1_20.bin");
DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
DB::KeeperLogStore changelog_reader("./logs", 20, true);
/// It should print error message, but still able to start
changelog_reader.init(5, 0);
EXPECT_FALSE(fs::exists("./logs/changelog_1_20.bin"));
@ -862,9 +862,9 @@ TEST(CoordinationTest, SnapshotableHashMapTrySnapshot)
map_snp.disableSnapshotMode();
}
void addNode(DB::NuKeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner=0)
void addNode(DB::KeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner=0)
{
using Node = DB::NuKeeperStorage::Node;
using Node = DB::KeeperStorage::Node;
Node node{};
node.data = data;
node.stat.ephemeralOwner = ephemeral_owner;
@ -874,9 +874,9 @@ void addNode(DB::NuKeeperStorage & storage, const std::string & path, const std:
TEST(CoordinationTest, TestStorageSnapshotSimple)
{
ChangelogDirTest test("./snapshots");
DB::NuKeeperSnapshotManager manager("./snapshots", 3);
DB::KeeperSnapshotManager manager("./snapshots", 3);
DB::NuKeeperStorage storage(500);
DB::KeeperStorage storage(500);
addNode(storage, "/hello", "world", 1);
addNode(storage, "/hello/somepath", "somedata", 3);
storage.session_id_counter = 5;
@ -886,7 +886,7 @@ TEST(CoordinationTest, TestStorageSnapshotSimple)
storage.getSessionID(130);
storage.getSessionID(130);
DB::NuKeeperStorageSnapshot snapshot(&storage, 2);
DB::KeeperStorageSnapshot snapshot(&storage, 2);
EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 2);
EXPECT_EQ(snapshot.session_id, 7);
@ -921,9 +921,9 @@ TEST(CoordinationTest, TestStorageSnapshotSimple)
TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
{
ChangelogDirTest test("./snapshots");
DB::NuKeeperSnapshotManager manager("./snapshots", 3);
DB::KeeperSnapshotManager manager("./snapshots", 3);
DB::NuKeeperStorage storage(500);
DB::KeeperStorage storage(500);
storage.getSessionID(130);
for (size_t i = 0; i < 50; ++i)
@ -931,7 +931,7 @@ TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i));
}
DB::NuKeeperStorageSnapshot snapshot(&storage, 50);
DB::KeeperStorageSnapshot snapshot(&storage, 50);
EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 50);
EXPECT_EQ(snapshot.snapshot_container_size, 51);
@ -961,9 +961,9 @@ TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
{
ChangelogDirTest test("./snapshots");
DB::NuKeeperSnapshotManager manager("./snapshots", 3);
DB::KeeperSnapshotManager manager("./snapshots", 3);
DB::NuKeeperStorage storage(500);
DB::KeeperStorage storage(500);
storage.getSessionID(130);
for (size_t j = 1; j <= 5; ++j)
@ -973,7 +973,7 @@ TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i));
}
DB::NuKeeperStorageSnapshot snapshot(&storage, j * 50);
DB::KeeperStorageSnapshot snapshot(&storage, j * 50);
auto buf = manager.serializeSnapshotToBuffer(snapshot);
manager.serializeSnapshotBufferToDisk(*buf, j * 50);
EXPECT_TRUE(fs::exists(std::string{"./snapshots/snapshot_"} + std::to_string(j * 50) + ".bin"));
@ -999,15 +999,15 @@ TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
TEST(CoordinationTest, TestStorageSnapshotMode)
{
ChangelogDirTest test("./snapshots");
DB::NuKeeperSnapshotManager manager("./snapshots", 3);
DB::NuKeeperStorage storage(500);
DB::KeeperSnapshotManager manager("./snapshots", 3);
DB::KeeperStorage storage(500);
for (size_t i = 0; i < 50; ++i)
{
addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i));
}
{
DB::NuKeeperStorageSnapshot snapshot(&storage, 50);
DB::KeeperStorageSnapshot snapshot(&storage, 50);
for (size_t i = 0; i < 50; ++i)
{
addNode(storage, "/hello_" + std::to_string(i), "wlrd_" + std::to_string(i));
@ -1050,14 +1050,14 @@ TEST(CoordinationTest, TestStorageSnapshotMode)
TEST(CoordinationTest, TestStorageSnapshotBroken)
{
ChangelogDirTest test("./snapshots");
DB::NuKeeperSnapshotManager manager("./snapshots", 3);
DB::NuKeeperStorage storage(500);
DB::KeeperSnapshotManager manager("./snapshots", 3);
DB::KeeperStorage storage(500);
for (size_t i = 0; i < 50; ++i)
{
addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i));
}
{
DB::NuKeeperStorageSnapshot snapshot(&storage, 50);
DB::KeeperStorageSnapshot snapshot(&storage, 50);
auto buf = manager.serializeSnapshotToBuffer(snapshot);
manager.serializeSnapshotBufferToDisk(*buf, 50);
}
@ -1095,9 +1095,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, size
ResponsesQueue queue;
SnapshotsQueue snapshots_queue{1};
auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
state_machine->init();
DB::NuKeeperLogStore changelog("./logs", settings->rotate_log_storage_interval, true);
DB::KeeperLogStore changelog("./logs", settings->rotate_log_storage_interval, true);
changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items);
for (size_t i = 1; i < total_logs + 1; ++i)
{
@ -1132,11 +1132,11 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, size
}
SnapshotsQueue snapshots_queue1{1};
auto restore_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue1, "./snapshots", settings);
auto restore_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue1, "./snapshots", settings);
restore_machine->init();
EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance);
DB::NuKeeperLogStore restore_changelog("./logs", settings->rotate_log_storage_interval, true);
DB::KeeperLogStore restore_changelog("./logs", settings->rotate_log_storage_interval, true);
restore_changelog.init(restore_machine->last_commit_index() + 1, settings->reserved_log_items);
EXPECT_EQ(restore_changelog.size(), std::min(settings->reserved_log_items + total_logs % settings->snapshot_distance, total_logs));
@ -1242,7 +1242,7 @@ TEST(CoordinationTest, TestEphemeralNodeRemove)
ResponsesQueue queue;
SnapshotsQueue snapshots_queue{1};
auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
state_machine->init();
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();

View File

@ -116,7 +116,7 @@ public:
/** Get a list of column names separated by commas. */
std::string dumpNames() const;
/** List of names, types and lengths of columns. Designed for debugging. */
/** List of names, types and lengths of columns. Designed for debugging. */
std::string dumpStructure() const;
/** List of column names and positions from index */

View File

@ -12,7 +12,7 @@ namespace MySQLProtocol
namespace ProtocolText
{
ResultSetRow::ResultSetRow(const DataTypes & data_types, const Columns & columns_, int row_num_)
ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_)
: columns(columns_), row_num(row_num_)
{
for (size_t i = 0; i < columns.size(); i++)
@ -25,7 +25,7 @@ ResultSetRow::ResultSetRow(const DataTypes & data_types, const Columns & columns
else
{
WriteBufferFromOwnString ostr;
data_types[i]->serializeAsText(*columns[i], row_num, ostr, FormatSettings());
serializations[i]->serializeText(*columns[i], row_num, ostr, FormatSettings());
payload_size += getLengthEncodedStringSize(ostr.str());
serialized.push_back(std::move(ostr.str()));
}

View File

@ -76,7 +76,7 @@ protected:
void writePayloadImpl(WriteBuffer & buffer) const override;
public:
ResultSetRow(const DataTypes & data_types, const Columns & columns_, int row_num_);
ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_);
};
class ComFieldList : public LimitedReadPacket

View File

@ -22,7 +22,9 @@ NameAndTypePair::NameAndTypePair(
: name(name_in_storage_ + (subcolumn_name_.empty() ? "" : "." + subcolumn_name_))
, type(subcolumn_type_)
, type_in_storage(type_in_storage_)
, subcolumn_delimiter_position(name_in_storage_.size()) {}
, subcolumn_delimiter_position(subcolumn_name_.empty() ? std::nullopt : std::make_optional(name_in_storage_.size()))
{
}
String NameAndTypePair::getNameInStorage() const
{

View File

@ -116,6 +116,7 @@ class IColumn;
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
@ -215,7 +216,7 @@ class IColumn;
\
M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \
M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.", 0) \
M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \
M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
\
@ -437,7 +438,9 @@ class IColumn;
M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
M(Bool, database_replicated_ddl_output, true, "Return table with query execution status as a result of DDL query", 0) \
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
@ -449,6 +452,7 @@ class IColumn;
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing. Will be removed after 2021-09-08", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.

View File

@ -102,4 +102,10 @@ IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION,
{"ALL", UnionMode::ALL},
{"DISTINCT", UnionMode::DISTINCT}})
IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS,
{{"none", DistributedDDLOutputMode::NONE},
{"throw", DistributedDDLOutputMode::THROW},
{"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT},
{"never_throw", DistributedDDLOutputMode::NEVER_THROW}})
}

View File

@ -138,4 +138,15 @@ enum class UnionMode
DECLARE_SETTING_ENUM(UnionMode)
enum class DistributedDDLOutputMode
{
NONE,
THROW,
NULL_STATUS_ON_TIMEOUT,
NEVER_THROW,
};
DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
}

View File

@ -73,14 +73,16 @@ void NativeBlockInputStream::resetParser()
void NativeBlockInputStream::readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
{
IDataType::DeserializeBinaryBulkSettings settings;
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
ISerialization::DeserializeBinaryBulkSettings settings;
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.avg_value_size_hint = avg_value_size_hint;
settings.position_independent_encoding = false;
IDataType::DeserializeBinaryBulkStatePtr state;
type.deserializeBinaryBulkStatePrefix(settings, state);
type.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state);
ISerialization::DeserializeBinaryBulkStatePtr state;
auto serialization = type.getDefaultSerialization();
serialization->deserializeBinaryBulkStatePrefix(settings, state);
serialization->deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr);
if (column->size() != rows)
throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column->size()) + ". Rows expected: " + toString(rows) + ".",

View File

@ -48,15 +48,17 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr
*/
ColumnPtr full_column = column->convertToFullColumnIfConst();
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
ISerialization::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; };
settings.position_independent_encoding = false;
settings.low_cardinality_max_dictionary_size = 0;
IDataType::SerializeBinaryBulkStatePtr state;
type.serializeBinaryBulkStatePrefix(settings, state);
type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
type.serializeBinaryBulkStateSuffix(settings, state);
auto serialization = type.getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
}

View File

@ -176,7 +176,7 @@ void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view
case ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->deserializeAsWholeText(column, istr, FormatSettings{});
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ValueType::vtArray:

View File

@ -104,7 +104,7 @@ void RemoteQueryExecutorReadContext::setConnectionFD(int fd, const Poco::Timespa
connection_fd_description = fd_description;
}
bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) const
bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking)
{
try
{
@ -118,7 +118,7 @@ bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) const
}
}
bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking)
{
/// Wait for epoll will not block if it was polled externally.
epoll_event events[3];
@ -128,14 +128,13 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
bool is_socket_ready = false;
bool is_pipe_alarmed = false;
bool has_timer_alarm = false;
for (int i = 0; i < num_events; ++i)
{
if (events[i].data.fd == connection_fd)
is_socket_ready = true;
if (events[i].data.fd == timer.getDescriptor())
has_timer_alarm = true;
is_timer_alarmed = true;
if (events[i].data.fd == pipe_fd[0])
is_pipe_alarmed = true;
}
@ -143,7 +142,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
if (is_pipe_alarmed)
return false;
if (has_timer_alarm && !is_socket_ready)
if (is_timer_alarmed && !is_socket_ready)
{
/// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
timer.drain();
@ -188,10 +187,18 @@ void RemoteQueryExecutorReadContext::cancel()
/// It is safe to just destroy fiber - we are not in the process of reading from socket.
boost::context::fiber to_destroy = std::move(fiber);
while (is_read_in_progress.load(std::memory_order_relaxed))
/// One should not try to wait for the current packet here in case of
/// timeout because this will exceed the timeout.
/// Anyway if the timeout is exceeded, then the connection will be shutdown
/// (disconnected), so it will not left in an unsynchronised state.
if (!is_timer_alarmed)
{
checkTimeout(/* blocking= */ true);
to_destroy = std::move(to_destroy).resume();
/// Wait for current pending packet, to avoid leaving connection in unsynchronised state.
while (is_read_in_progress.load(std::memory_order_relaxed))
{
checkTimeout(/* blocking= */ true);
to_destroy = std::move(to_destroy).resume();
}
}
/// Send something to pipe to cancel executor waiting.

View File

@ -44,6 +44,7 @@ public:
/// * pipe_fd is a pipe we use to cancel query and socket polling by executor.
/// We put those descriptors into our own epoll which is used by external executor.
TimerDescriptor timer{CLOCK_MONOTONIC, 0};
bool is_timer_alarmed = false;
int connection_fd = -1;
int pipe_fd[2] = { -1, -1 };
@ -54,8 +55,8 @@ public:
explicit RemoteQueryExecutorReadContext(IConnections & connections_);
~RemoteQueryExecutorReadContext();
bool checkTimeout(bool blocking = false) const;
bool checkTimeoutImpl(bool blocking) const;
bool checkTimeout(bool blocking = false);
bool checkTimeoutImpl(bool blocking);
void setConnectionFD(int fd, const Poco::Timespan & timeout = 0, const std::string & fd_description = "");
void setTimer() const;

View File

@ -11,6 +11,7 @@
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/Serializations/SerializationAggregateFunction.h>
#include <DataTypes/DataTypeFactory.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
@ -58,207 +59,6 @@ std::string DataTypeAggregateFunction::doGetName() const
return stream.str();
}
void DataTypeAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = get<const String &>(field);
writeVarUInt(s.size(), ostr);
writeString(s, ostr);
}
void DataTypeAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr) const
{
UInt64 size;
readVarUInt(size, istr);
field = String();
String & s = get<String &>(field);
s.resize(size);
istr.readStrict(s.data(), size);
}
void DataTypeAggregateFunction::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], ostr);
}
void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
Arena & arena = column_concrete.createOrGetArena();
size_t size_of_state = function->sizeOfData();
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
function->create(place);
try
{
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
column_concrete.getData().push_back(place);
}
void DataTypeAggregateFunction::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnAggregateFunction & real_column = typeid_cast<const ColumnAggregateFunction &>(column);
const ColumnAggregateFunction::Container & vec = real_column.getData();
ColumnAggregateFunction::Container::const_iterator it = vec.begin() + offset;
ColumnAggregateFunction::Container::const_iterator end = limit ? it + limit : vec.end();
if (end > vec.end())
end = vec.end();
for (; it != end; ++it)
function->serialize(*it, ostr);
}
void DataTypeAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
ColumnAggregateFunction & real_column = typeid_cast<ColumnAggregateFunction &>(column);
ColumnAggregateFunction::Container & vec = real_column.getData();
Arena & arena = real_column.createOrGetArena();
real_column.set(function);
vec.reserve(vec.size() + limit);
size_t size_of_state = function->sizeOfData();
size_t align_of_state = function->alignOfData();
for (size_t i = 0; i < limit; ++i)
{
if (istr.eof())
break;
AggregateDataPtr place = arena.alignedAlloc(size_of_state, align_of_state);
function->create(place);
try
{
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
vec.push_back(place);
}
}
static String serializeToString(const AggregateFunctionPtr & function, const IColumn & column, size_t row_num)
{
WriteBufferFromOwnString buffer;
function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], buffer);
return buffer.str();
}
static void deserializeFromString(const AggregateFunctionPtr & function, IColumn & column, const String & s)
{
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
Arena & arena = column_concrete.createOrGetArena();
size_t size_of_state = function->sizeOfData();
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
function->create(place);
try
{
ReadBufferFromString istr(s);
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
column_concrete.getData().push_back(place);
}
void DataTypeAggregateFunction::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readEscapedString(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readQuotedStringWithSQLStyle(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readStringUntilEOF(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(function, column, row_num), ostr, settings);
}
void DataTypeAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readJSONString(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSV(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
deserializeFromString(function, column, s);
}
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
{
return ColumnAggregateFunction::create(function);
@ -298,6 +98,11 @@ bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this) && getName() == rhs.getName();
}
SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const
{
return std::make_shared<SerializationAggregateFunction>(function);
}
static DataTypePtr create(const ASTPtr & arguments)
{

View File

@ -39,27 +39,6 @@ public:
DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); }
DataTypes getArgumentsDataTypes() const { return argument_types; }
/// NOTE These two functions for serializing single values are incompatible with the functions below.
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
@ -69,6 +48,8 @@ public:
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return false; }
bool shouldAlignRightInPrettyFormats() const override { return false; }
SerializationPtr doGetDefaultSerialization() const override;
};

View File

@ -9,7 +9,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeOneElementTuple.h>
#include <DataTypes/Serializations/SerializationArray.h>
#include <DataTypes/Serializations/SerializationTupleElement.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Parsers/IAST.h>
@ -24,10 +26,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_READ_ALL_DATA;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
}
@ -37,490 +36,6 @@ DataTypeArray::DataTypeArray(const DataTypePtr & nested_)
}
void DataTypeArray::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const Array & a = get<const Array &>(field);
writeVarUInt(a.size(), ostr);
for (size_t i = 0; i < a.size(); ++i)
{
nested->serializeBinary(a[i], ostr);
}
}
void DataTypeArray::deserializeBinary(Field & field, ReadBuffer & istr) const
{
size_t size;
readVarUInt(size, istr);
field = Array(size);
Array & arr = get<Array &>(field);
for (size_t i = 0; i < size; ++i)
nested->deserializeBinary(arr[i], istr);
}
void DataTypeArray::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
size_t size = next_offset - offset;
writeVarUInt(size, ostr);
const IColumn & nested_column = column_array.getData();
for (size_t i = offset; i < next_offset; ++i)
nested->serializeBinary(nested_column, i, ostr);
}
void DataTypeArray::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t size;
readVarUInt(size, istr);
IColumn & nested_column = column_array.getData();
size_t i = 0;
try
{
for (; i < size; ++i)
nested->deserializeBinary(nested_column, istr);
}
catch (...)
{
if (i)
nested_column.popBack(i);
throw;
}
offsets.push_back(offsets.back() + size);
}
namespace
{
void serializeArraySizesPositionIndependent(const IColumn & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
{
const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offset_values = column_array.getOffsets();
size_t size = offset_values.size();
if (!size)
return;
size_t end = limit && (offset + limit < size)
? offset + limit
: size;
ColumnArray::Offset prev_offset = offset_values[offset - 1];
for (size_t i = offset; i < end; ++i)
{
ColumnArray::Offset current_offset = offset_values[i];
writeIntBinary(current_offset - prev_offset, ostr);
prev_offset = current_offset;
}
}
void deserializeArraySizesPositionIndependent(IColumn & column, ReadBuffer & istr, UInt64 limit)
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
ColumnArray::Offsets & offset_values = column_array.getOffsets();
size_t initial_size = offset_values.size();
offset_values.resize(initial_size + limit);
size_t i = initial_size;
ColumnArray::Offset current_offset = initial_size ? offset_values[initial_size - 1] : 0;
while (i < initial_size + limit && !istr.eof())
{
ColumnArray::Offset current_size = 0;
readIntBinary(current_size, istr);
current_offset += current_size;
offset_values[i] = current_offset;
++i;
}
offset_values.resize(i);
}
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
{
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
if (column_offsets.empty())
return column_sizes;
const auto & offsets_data = column_offsets.getData();
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
sizes_data.resize(offsets_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
{
auto current_offset = offsets_data[i];
sizes_data[i] = current_offset - prev_offset;
prev_offset = current_offset;
}
return column_sizes;
}
ColumnPtr arraySizesToOffsets(const IColumn & column)
{
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
if (column_sizes.empty())
return column_offsets;
const auto & sizes_data = column_sizes.getData();
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
offsets_data.resize(sizes_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
{
prev_offset += sizes_data[i];
offsets_data[i] = prev_offset;
}
return column_offsets;
}
}
void DataTypeArray::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::ArraySizes);
callback(path, *this);
path.back() = Substream::ArrayElements;
nested->enumerateStreams(callback, path);
path.pop_back();
}
void DataTypeArray::serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->serializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->serializeBinaryBulkStateSuffix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->deserializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
/// First serialize array sizes.
settings.path.push_back(Substream::ArraySizes);
if (auto * stream = settings.getter(settings.path))
{
if (settings.position_independent_encoding)
serializeArraySizesPositionIndependent(column, *stream, offset, limit);
else
DataTypeNumber<ColumnArray::Offset>().serializeBinaryBulk(*column_array.getOffsetsPtr(), *stream, offset, limit);
}
/// Then serialize contents of arrays.
settings.path.back() = Substream::ArrayElements;
const ColumnArray::Offsets & offset_values = column_array.getOffsets();
if (offset > offset_values.size())
return;
/** offset - from which array to write.
* limit - how many arrays should be written, or 0, if you write everything that is.
* end - up to which array the recorded piece ends.
*
* nested_offset - from which element of the innards to write.
* nested_limit - how many elements of the innards to write, or 0, if you write everything that is.
*/
size_t end = std::min(offset + limit, offset_values.size());
size_t nested_offset = offset ? offset_values[offset - 1] : 0;
size_t nested_limit = limit
? offset_values[end - 1] - nested_offset
: 0;
if (limit == 0 || nested_limit)
nested->serializeBinaryBulkWithMultipleStreams(column_array.getData(), nested_offset, nested_limit, settings, state);
settings.path.pop_back();
}
void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
settings.path.push_back(Substream::ArraySizes);
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
{
column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
}
else if (auto * stream = settings.getter(settings.path))
{
if (settings.position_independent_encoding)
deserializeArraySizesPositionIndependent(column, *stream, limit);
else
DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
}
settings.path.back() = Substream::ArrayElements;
ColumnArray::Offsets & offset_values = column_array.getOffsets();
ColumnPtr & nested_column = column_array.getDataPtr();
/// Number of values corresponding with `offset_values` must be read.
size_t last_offset = offset_values.back();
if (last_offset < nested_column->size())
throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
size_t nested_limit = last_offset - nested_column->size();
/// Adjust value size hint. Divide it to the average array size.
settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;
nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state, cache);
settings.path.pop_back();
/// Check consistency between offsets and elements subcolumns.
/// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
if (!nested_column->empty() && nested_column->size() != last_offset)
throw ParsingException("Cannot read all array values: read just " + toString(nested_column->size()) + " of " + toString(last_offset),
ErrorCodes::CANNOT_READ_ALL_DATA);
}
template <typename Writer>
static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeChar('[', ostr);
for (size_t i = offset; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
write_nested(nested_column, i);
}
writeChar(']', ostr);
}
template <typename Reader>
static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
{
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
IColumn & nested_column = column_array.getData();
size_t size = 0;
bool has_braces = false;
if (checkChar('[', istr))
has_braces = true;
else if (!allow_unenclosed)
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
try
{
bool first = true;
while (!istr.eof() && *istr.position() != ']')
{
if (!first)
{
if (*istr.position() == ',')
++istr.position();
else
throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
"Cannot read array from text, expected comma or end of array, found '{}'",
*istr.position());
}
first = false;
skipWhitespaceIfAny(istr);
if (*istr.position() == ']')
break;
read_nested(nested_column);
++size;
skipWhitespaceIfAny(istr);
}
if (has_braces)
assertChar(']', istr);
else /// If array is not enclosed in braces, we read until EOF.
assertEOF(istr);
}
catch (...)
{
if (size)
nested_column.popBack(size);
throw;
}
offsets.push_back(offsets.back() + size);
}
void DataTypeArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
[&](const IColumn & nested_column, size_t i)
{
nested->serializeAsTextQuoted(nested_column, i, ostr, settings);
});
}
void DataTypeArray::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeAsTextQuoted(nested_column, istr, settings);
}, false);
}
void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeChar('[', ostr);
for (size_t i = offset; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
nested->serializeAsTextJSON(nested_column, i, ostr, settings);
}
writeChar(']', ostr);
}
void DataTypeArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeAsTextJSON(nested_column, istr, settings);
}, false);
}
void DataTypeArray::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeCString("<array>", ostr);
for (size_t i = offset; i < next_offset; ++i)
{
writeCString("<elem>", ostr);
nested->serializeAsTextXML(nested_column, i, ostr, settings);
writeCString("</elem>", ostr);
}
writeCString("</array>", ostr);
}
void DataTypeArray::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
/// There is no good way to serialize an array in CSV. Therefore, we serialize it into a string, and then write the resulting string in CSV.
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
}
void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
if (settings.csv.input_format_arrays_as_nested_csv)
{
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeAsTextCSV(nested_column, rb, settings);
}, true);
}
else
{
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeAsTextQuoted(nested_column, rb, settings);
}, true);
}
}
MutableColumnPtr DataTypeArray::createColumn() const
{
return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
@ -546,7 +61,7 @@ DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) co
DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const
{
if (subcolumn_name == "size" + std::to_string(level))
return createOneElementTuple(std::make_shared<DataTypeUInt64>(), subcolumn_name, false);
return std::make_shared<DataTypeUInt64>();
DataTypePtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
@ -554,7 +69,10 @@ DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name
else
subcolumn = nested->tryGetSubcolumnType(subcolumn_name);
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
if (subcolumn && subcolumn_name != MAIN_SUBCOLUMN_NAME)
subcolumn = std::make_shared<DataTypeArray>(std::move(subcolumn));
return subcolumn;
}
ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
@ -577,6 +95,32 @@ ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const I
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
}
SerializationPtr DataTypeArray::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0);
}
SerializationPtr DataTypeArray::getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const
{
if (subcolumn_name == "size" + std::to_string(level))
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false);
SerializationPtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1);
else
subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
return std::make_shared<SerializationArray>(subcolumn);
}
SerializationPtr DataTypeArray::doGetDefaultSerialization() const
{
return std::make_shared<SerializationArray>(nested->getDefaultSerialization());
}
size_t DataTypeArray::getNumberOfDimensions() const
{
const DataTypeArray * nested_array = typeid_cast<const DataTypeArray *>(nested.get());

View File

@ -1,13 +1,14 @@
#pragma once
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/SerializationArray.h>
namespace DB
{
class DataTypeArray final : public DataTypeWithSimpleSerialization
class DataTypeArray final : public IDataType
{
private:
/// The type of array elements.
@ -35,56 +36,6 @@ public:
return false;
}
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Streaming serialization of arrays is arranged in a special way:
* - elements placed in a row are written/read without array sizes;
* - the sizes are written/read in a separate stream,
* This is necessary, because when implementing nested structures, several arrays can have common sizes.
*/
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
@ -105,6 +56,10 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override;
const DataTypePtr & getNestedType() const { return nested; }
@ -114,6 +69,8 @@ public:
private:
ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
SerializationPtr getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const;
};
}

View File

@ -3,7 +3,7 @@
#include <memory>
#include <cstddef>
#include <Core/Types.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
namespace DB
{
@ -24,106 +24,20 @@ public:
virtual String getName() const = 0;
};
class IDataTypeCustomTextSerialization
{
public:
virtual ~IDataTypeCustomTextSerialization() {}
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
* Without escaping or quoting.
*/
virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
/** Text deserialization without quoting or escaping.
*/
virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization with escaping but without quoting.
*/
virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization as a literal that may be inserted into a query.
*/
virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization for the CSV format.
*/
virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization intended for using in JSON format.
*/
virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization for putting into the XML format.
*/
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
};
/** Allows to customize an existing data type by representation with custom substreams.
* Customized data type will be serialized/deserialized to files with different names than base type,
* but binary and text representation will be unchanged.
* E.g it can be used for reading single subcolumns of complex types.
*/
class IDataTypeCustomStreams
{
public:
virtual ~IDataTypeCustomStreams() = default;
virtual void enumerateStreams(
const IDataType::StreamCallback & callback,
IDataType::SubstreamPath & path) const = 0;
virtual void serializeBinaryBulkStatePrefix(
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void serializeBinaryBulkStateSuffix(
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void deserializeBinaryBulkStatePrefix(
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state) const = 0;
virtual void serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state,
IDataType::SubstreamsCache * cache) const = 0;
};
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
/** Describe a data type customization
*/
struct DataTypeCustomDesc
{
DataTypeCustomNamePtr name;
DataTypeCustomTextSerializationPtr text_serialization;
DataTypeCustomStreamsPtr streams;
SerializationPtr serialization;
DataTypeCustomDesc(
DataTypeCustomNamePtr name_,
DataTypeCustomTextSerializationPtr text_serialization_ = nullptr,
DataTypeCustomStreamsPtr streams_ = nullptr)
SerializationPtr serialization_ = nullptr)
: name(std::move(name_))
, text_serialization(std::move(text_serialization_))
, streams(std::move(streams_)) {}
, serialization(std::move(serialization_)) {}
};
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;

View File

@ -1,10 +1,6 @@
#include <DataTypes/DataTypeCustomGeo.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
@ -12,102 +8,20 @@
namespace DB
{
namespace
{
const auto point_data_type = std::make_shared<const DataTypeTuple>(
DataTypes{std::make_shared<const DataTypeFloat64>(), std::make_shared<const DataTypeFloat64>()}
);
const auto ring_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomPointSerialization::nestedDataType());
const auto polygon_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomRingSerialization::nestedDataType());
const auto multipolygon_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomPolygonSerialization::nestedDataType());
}
void DataTypeCustomPointSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomPointSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomPointSerialization::nestedDataType()
{
return point_data_type;
}
void DataTypeCustomRingSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomRingSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomRingSerialization::nestedDataType()
{
return ring_data_type;
}
void DataTypeCustomPolygonSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomPolygonSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomPolygonSerialization::nestedDataType()
{
return polygon_data_type;
}
void DataTypeCustomMultiPolygonSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomMultiPolygonSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomMultiPolygonSerialization::nestedDataType()
{
return multipolygon_data_type;
}
void registerDataTypeDomainGeo(DataTypeFactory & factory)
{
// Custom type for point represented as its coordinates stored as Tuple(Float64, Float64)
factory.registerSimpleDataTypeCustom("Point", []
{
return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Point"), std::make_unique<DataTypeCustomPointSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>()));
});
// Custom type for simple polygon without holes stored as Array(Point)
factory.registerSimpleDataTypeCustom("Ring", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Ring"), std::make_unique<DataTypeCustomRingSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeRingName>()));
});
// Custom type for polygon with holes stored as Array(Ring)
@ -115,14 +29,14 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory)
factory.registerSimpleDataTypeCustom("Polygon", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Polygon"), std::make_unique<DataTypeCustomPolygonSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePolygonName>()));
});
// Custom type for multiple polygons with holes stored as Array(Polygon)
factory.registerSimpleDataTypeCustom("MultiPolygon", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("MultiPolygon"), std::make_unique<DataTypeCustomMultiPolygonSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiPolygonName>()));
});
}

View File

@ -1,56 +1,32 @@
#pragma once
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
class DataTypeCustomPointSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypePointName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypePointName() : DataTypeCustomFixedName("Point") {}
};
class DataTypeCustomRingSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypeRingName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypeRingName() : DataTypeCustomFixedName("Ring") {}
};
class DataTypeCustomPolygonSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypePolygonName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypePolygonName() : DataTypeCustomFixedName("Polygon") {}
};
class DataTypeCustomMultiPolygonSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypeMultiPolygonName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypeMultiPolygonName() : DataTypeCustomFixedName("MultiPolygon") {}
};
}

View File

@ -1,115 +1,24 @@
#include <Columns/ColumnsNumber.h>
#include <Common/Exception.h>
#include <Common/formatIPv6.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/Serializations/SerializationIP.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeCustom.h>
#include <Functions/FunctionsCoding.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
}
namespace
{
class DataTypeCustomIPv4Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto * col = checkAndGetColumn<ColumnUInt32>(&column);
if (!col)
{
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
char * ptr = buffer;
formatIPv4(reinterpret_cast<const unsigned char *>(&col->getData()[row_num]), ptr);
ostr.write(buffer, strlen(buffer));
}
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
if (!col)
{
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
UInt32 ipv4_value = 0;
if (!parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value)))
{
throw Exception("Invalid IPv4 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insert(ipv4_value);
}
};
class DataTypeCustomIPv6Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto * col = checkAndGetColumn<ColumnFixedString>(&column);
if (!col)
{
throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
char * ptr = buffer;
formatIPv6(reinterpret_cast<const unsigned char *>(col->getDataAt(row_num).data), ptr);
ostr.write(buffer, strlen(buffer));
}
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
ColumnFixedString * col = typeid_cast<ColumnFixedString *>(&column);
if (!col)
{
throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
{
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertString(ipv6_value);
}
};
}
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
{
factory.registerSimpleDataTypeCustom("IPv4", []
{
return std::make_pair(DataTypeFactory::instance().get("UInt32"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<DataTypeCustomIPv4Serialization>()));
auto type = DataTypeFactory::instance().get("UInt32");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<SerializationIPv4>(type->getDefaultSerialization())));
});
factory.registerSimpleDataTypeCustom("IPv6", []
{
return std::make_pair(DataTypeFactory::instance().get("FixedString(16)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<DataTypeCustomIPv6Serialization>()));
auto type = DataTypeFactory::instance().get("FixedString(16)");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<SerializationIPv6>(type->getDefaultSerialization())));
});
/// MySQL, MariaDB

View File

@ -1,91 +0,0 @@
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
namespace
{
using namespace DB;
String serializeToString(const DataTypeCustomSimpleTextSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
{
WriteBufferFromOwnString buffer;
domain.serializeText(column, row_num, buffer, settings);
return buffer.str();
}
void deserializeFromString(const DataTypeCustomSimpleTextSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
{
ReadBufferFromString istr(s);
domain.deserializeText(column, istr, settings);
}
}
namespace DB
{
void DataTypeCustomSimpleTextSerialization::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeCustomSimpleTextSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readEscapedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeCustomSimpleTextSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeCustomSimpleTextSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readQuotedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeCustomSimpleTextSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeCustomSimpleTextSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readCSVString(str, istr, settings.csv);
deserializeFromString(*this, column, str, settings);
}
void DataTypeCustomSimpleTextSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
}
void DataTypeCustomSimpleTextSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readJSONString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeCustomSimpleTextSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr);
}
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <memory>
namespace DB
{
class IDataTypeCustomName;
class IDataTypeCustomTextSerialization;
class IDataTypeCustomStreams;
struct DataTypeCustomDesc;
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
}

View File

@ -3,6 +3,7 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/Serializations/SerializationDate.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/assert_cast.h>
@ -11,79 +12,15 @@
namespace DB
{
void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr);
}
void DataTypeDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
readDateText(x, istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
void DataTypeDate::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void DataTypeDate::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
assertChar('\'', istr);
readDateText(x, istr);
assertChar('\'', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDate::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
assertChar('"', istr);
readDateText(x, istr);
assertChar('"', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
void DataTypeDate::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
LocalDate value;
readCSV(value, istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
}
bool DataTypeDate::equals(const IDataType & rhs) const
{
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeDate::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDate>();
}
void registerDataTypeDate(DataTypeFactory & factory)
{

View File

@ -14,21 +14,13 @@ public:
TypeIndex getTypeId() const override { return TypeIndex::Date; }
const char * getFamilyName() const override { return family_name; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool equals(const IDataType & rhs) const override;
protected:
SerializationPtr doGetDefaultSerialization() const override;
};
}

View File

@ -1,4 +1,5 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/Serializations/SerializationDateTime.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
@ -15,26 +16,6 @@
namespace DB
{
namespace
{
inline void readTextHelper(
time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
readDateTimeText(x, istr, time_zone);
return;
case FormatSettings::DateTimeInputFormat::BestEffort:
parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
return;
}
}
}
TimezoneMixin::TimezoneMixin(const String & time_zone_name)
: has_explicit_time_zone(!time_zone_name.empty()),
time_zone(DateLUT::instance(time_zone_name)),
@ -62,124 +43,6 @@ String DataTypeDateTime::doGetName() const
return out.str();
}
void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeIntText(value, ostr);
return;
case FormatSettings::DateTimeOutputFormat::ISO:
writeDateTimeTextISO(value, ostr, utc_time_zone);
return;
}
}
void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
assertChar('\'', istr);
}
else /// Just 1504193808 or 01504193808
{
readIntText(x, istr);
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('"', istr))
{
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
assertChar('"', istr);
}
else
{
readIntText(x, istr);
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (istr.eof())
throwReadAfterEOF();
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool DataTypeDateTime::equals(const IDataType & rhs) const
{
/// DateTime with different timezones are equal, because:
@ -187,4 +50,9 @@ bool DataTypeDateTime::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeDateTime::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDateTime>(time_zone, utc_time_zone);
}
}

View File

@ -58,21 +58,12 @@ public:
String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool equals(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;
};
}

View File

@ -1,4 +1,5 @@
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/Serializations/SerializationDateTime64.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
@ -55,131 +56,6 @@ std::string DataTypeDateTime64::doGetName() const
return out.str();
}
void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, scale, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeDateTimeUnixTimestamp(value, scale, ostr);
return;
case FormatSettings::DateTimeOutputFormat::ISO:
writeDateTimeTextISO(value, scale, ostr, utc_time_zone);
return;
}
}
void DataTypeDateTime64::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DateTime64 result = 0;
readDateTime64Text(result, this->getScale(), istr, time_zone);
assert_cast<ColumnType &>(column).getData().push_back(result);
}
void DataTypeDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDateTime64::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
readDateTime64Text(x, scale, istr, time_zone);
return;
case FormatSettings::DateTimeInputFormat::BestEffort:
parseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone);
return;
}
}
void DataTypeDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assertChar('\'', istr);
}
else /// Just 1504193808 or 01504193808
{
readIntText(x, istr);
}
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('"', istr))
{
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assertChar('"', istr);
}
else
{
readIntText(x, istr);
}
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (istr.eof())
throwReadAfterEOF();
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();
readText(x, scale, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool DataTypeDateTime64::equals(const IDataType & rhs) const
{
if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
@ -187,4 +63,9 @@ bool DataTypeDateTime64::equals(const IDataType & rhs) const
return false;
}
SerializationPtr DataTypeDateTime64::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDateTime64>(time_zone, utc_time_zone, scale);
}
}

View File

@ -31,21 +31,12 @@ public:
std::string doGetName() const override;
TypeIndex getTypeId() const override { return type_id; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool equals(const IDataType & rhs) const override;
bool canBePromoted() const override { return false; }
protected:
SerializationPtr doGetDefaultSerialization() const override;
};
}

View File

@ -35,59 +35,6 @@ MutableColumnPtr DataTypeDecimalBase<T>::createColumn() const
return ColumnType::create(0, scale);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
FieldType x = get<DecimalField<T>>(field);
writeBinary(x, ostr);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
const FieldType & x = assert_cast<const ColumnType &>(column).getElement(row_num);
writeBinary(x, ostr);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const typename ColumnType::Container & x = typeid_cast<const ColumnType &>(column).getData();
size_t size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinary(Field & field, ReadBuffer & istr) const
{
typename FieldType::NativeType x;
readBinary(x, istr);
field = DecimalField(T(x), this->scale);
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
typename FieldType::NativeType x;
readBinary(x, istr);
assert_cast<ColumnType &>(column).getData().push_back(FieldType(x));
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const
{
typename ColumnType::Container & x = typeid_cast<ColumnType &>(column).getData();
size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(FieldType) * limit);
x.resize(initial_size + size / sizeof(FieldType));
}
template <typename T>
T DataTypeDecimalBase<T>::getScaleMultiplier(UInt32 scale_)
{

View File

@ -5,7 +5,6 @@
#include <Core/DecimalFunctions.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <type_traits>
@ -55,7 +54,7 @@ inline UInt32 leastDecimalPrecisionFor(TypeIndex int_type)
/// P is one of (9, 18, 38, 76); equals to the maximum precision for the biggest underlying type of operands.
/// S is maximum scale of operands. The allowed valuas are [0, precision]
template <typename T>
class DataTypeDecimalBase : public DataTypeWithSimpleSerialization
class DataTypeDecimalBase : public IDataType
{
static_assert(IsDecimalNumber<T>);
@ -96,14 +95,6 @@ public:
bool canBeUsedInBooleanContext() const override { return true; }
bool canBeInsideNullable() const override { return true; }
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
/// Decimal specific
UInt32 getPrecision() const { return precision; }

View File

@ -1,6 +1,7 @@
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/Serializations/SerializationEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
@ -19,7 +20,6 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_TYPE_OF_FIELD;
extern const int SYNTAX_ERROR;
extern const int EMPTY_DATA_PASSED;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int ARGUMENT_OUT_OF_BOUND;
@ -65,203 +65,22 @@ std::string DataTypeEnum<Type>::generateName(const Values & values)
}
template <typename Type>
void DataTypeEnum<Type>::fillMaps()
DataTypeEnum<Type>::DataTypeEnum(const Values & values_)
: EnumValues<Type>(values_)
, type_name(generateName(this->getValues()))
{
for (const auto & name_and_value : values)
{
const auto inserted_value = name_to_value_map.insert(
{ StringRef{name_and_value.first}, name_and_value.second });
if (!inserted_value.second)
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and " + toString(inserted_value.first->getMapped()),
ErrorCodes::SYNTAX_ERROR};
const auto inserted_name = value_to_name_map.insert(
{ name_and_value.second, StringRef{name_and_value.first} });
if (!inserted_name.second)
throw Exception{"Duplicate values in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and '" + toString((*inserted_name.first).first) + "'",
ErrorCodes::SYNTAX_ERROR};
}
}
template <typename Type>
DataTypeEnum<Type>::DataTypeEnum(const Values & values_) : values{values_}
{
if (values.empty())
throw Exception{"DataTypeEnum enumeration cannot be empty", ErrorCodes::EMPTY_DATA_PASSED};
std::sort(std::begin(values), std::end(values), [] (auto & left, auto & right)
{
return left.second < right.second;
});
fillMaps();
type_name = generateName(values);
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const FieldType x = get<FieldType>(field);
writeBinary(x, ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinary(Field & field, ReadBuffer & istr) const
{
FieldType x;
readBinary(x, istr);
field = castToNearestFieldType(x);
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
writeBinary(assert_cast<const ColumnType &>(column).getData()[row_num], ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
typename ColumnType::ValueType x;
readBinary(x, istr);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
template <typename Type>
void DataTypeEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
std::string field_name;
readQuotedStringWithSQLStyle(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
template <typename Type>
void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr, settings);
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
if (!istr.eof() && *istr.position() != '"')
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSVString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.csv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinaryBulk(
const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const
{
const auto & x = typeid_cast<const ColumnType &>(column).getData();
const auto size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinaryBulk(
IColumn & column, ReadBuffer & istr, const size_t limit, const double /*avg_value_size_hint*/) const
{
auto & x = typeid_cast<ColumnType &>(column).getData();
const auto initial_size = x.size();
x.resize(initial_size + limit);
const auto size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(FieldType) * limit);
x.resize(initial_size + size / sizeof(FieldType));
}
template <typename Type>
Field DataTypeEnum<Type>::getDefault() const
{
return values.front().second;
return this->getValues().front().second;
}
template <typename Type>
void DataTypeEnum<Type>::insertDefaultInto(IColumn & column) const
{
assert_cast<ColumnType &>(column).getData().push_back(values.front().second);
assert_cast<ColumnType &>(column).getData().push_back(this->getValues().front().second);
}
template <typename Type>
@ -274,7 +93,7 @@ bool DataTypeEnum<Type>::equals(const IDataType & rhs) const
template <typename Type>
bool DataTypeEnum<Type>::textCanContainOnlyValidUTF8() const
{
for (const auto & elem : values)
for (const auto & elem : this->getValues())
{
const char * pos = elem.first.data();
const char * end = pos + elem.first.size();
@ -305,14 +124,14 @@ Field DataTypeEnum<Type>::castToName(const Field & value_or_name) const
{
if (value_or_name.getType() == Field::Types::String)
{
getValue(value_or_name.get<String>()); /// Check correctness
this->getValue(value_or_name.get<String>()); /// Check correctness
return value_or_name.get<String>();
}
else if (value_or_name.getType() == Field::Types::Int64)
{
Int64 value = value_or_name.get<Int64>();
checkOverflow<Type>(value);
return getNameForValue(static_cast<Type>(value)).toString();
return this->getNameForValue(static_cast<Type>(value)).toString();
}
else
throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD);
@ -323,14 +142,14 @@ Field DataTypeEnum<Type>::castToValue(const Field & value_or_name) const
{
if (value_or_name.getType() == Field::Types::String)
{
return getValue(value_or_name.get<String>());
return this->getValue(value_or_name.get<String>());
}
else if (value_or_name.getType() == Field::Types::Int64
|| value_or_name.getType() == Field::Types::UInt64)
{
Int64 value = value_or_name.get<Int64>();
checkOverflow<Type>(value);
getNameForValue(static_cast<Type>(value)); /// Check correctness
this->getNameForValue(static_cast<Type>(value)); /// Check correctness
return value;
}
else
@ -341,25 +160,19 @@ Field DataTypeEnum<Type>::castToValue(const Field & value_or_name) const
template <typename Type>
bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
{
auto check = [&](const auto & value)
{
auto it = name_to_value_map.find(value.first);
/// If we don't have this name, than we have to be sure,
/// that this value exists in enum
if (it == name_to_value_map.end())
return value_to_name_map.count(value.second) > 0;
/// If we have this name, than it should have the same value
return it->value.second == value.second;
};
if (const auto * rhs_enum8 = typeid_cast<const DataTypeEnum8 *>(&rhs))
return std::all_of(rhs_enum8->getValues().begin(), rhs_enum8->getValues().end(), check);
return this->containsAll(rhs_enum8->getValues());
if (const auto * rhs_enum16 = typeid_cast<const DataTypeEnum16 *>(&rhs))
return std::all_of(rhs_enum16->getValues().begin(), rhs_enum16->getValues().end(), check);
return this->containsAll(rhs_enum16->getValues());
return false;
}
template <typename Type>
SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
{
return std::make_shared<SerializationEnum<Type>>(this->getValues());
}
/// Explicit instantiations.
template class DataTypeEnum<Int8>;

View File

@ -1,6 +1,7 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <DataTypes/EnumValues.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>
#include <Common/HashTable/HashMap.h>
@ -11,12 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class IDataTypeEnum : public IDataType
{
public:
@ -36,102 +31,37 @@ public:
template <typename Type>
class DataTypeEnum final : public IDataTypeEnum
class DataTypeEnum final : public IDataTypeEnum, public EnumValues<Type>
{
public:
using FieldType = Type;
using ColumnType = ColumnVector<FieldType>;
using Value = std::pair<std::string, FieldType>;
using Values = std::vector<Value>;
using NameToValueMap = HashMap<StringRef, FieldType, StringRefHash>;
using ValueToNameMap = std::unordered_map<FieldType, StringRef>;
using typename EnumValues<Type>::Values;
static constexpr bool is_parametric = true;
private:
Values values;
NameToValueMap name_to_value_map;
ValueToNameMap value_to_name_map;
std::string type_name;
static std::string generateName(const Values & values);
void fillMaps();
public:
explicit DataTypeEnum(const Values & values_);
const Values & getValues() const { return values; }
std::string doGetName() const override { return type_name; }
const char * getFamilyName() const override;
TypeIndex getTypeId() const override { return sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16; }
auto findByValue(const FieldType & value) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
throw Exception{"Unexpected value " + toString(value) + " for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
return it;
}
const StringRef & getNameForValue(const FieldType & value) const
{
return findByValue(value)->second;
}
FieldType getValue(StringRef field_name, bool try_treat_as_id = false) const
{
const auto it = name_to_value_map.find(field_name);
if (!it)
{
/// It is used in CSV and TSV input formats. If we fail to find given string in
/// enum names, we will try to treat it as enum id.
if (try_treat_as_id)
{
FieldType x;
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
readText(x, tmp_buf);
/// Check if we reached end of the tmp_buf (otherwise field_name is not a number)
/// and try to find it in enum ids
if (tmp_buf.eof() && value_to_name_map.find(x) != value_to_name_map.end())
return x;
}
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
}
return it->getMapped();
}
FieldType readValue(ReadBuffer & istr) const
{
FieldType x;
readText(x, istr);
return findByValue(x)->first;
return this->findByValue(x)->first;
}
Field castToName(const Field & value_or_name) const override;
Field castToValue(const Field & value_or_name) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
MutableColumnPtr createColumn() const override { return ColumnType::create(); }
Field getDefault() const override;
@ -147,6 +77,8 @@ public:
/// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
/// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
bool contains(const IDataType & rhs) const;
SerializationPtr doGetDefaultSerialization() const override;
};

View File

@ -3,7 +3,7 @@
#include <DataTypes/IDataType.h>
#include <Parsers/IAST_fwd.h>
#include <Common/IFactoryWithAliases.h>
#include <DataTypes/DataTypeCustom_fwd.h>
#include <DataTypes/DataTypeCustom.h>
#include <functional>
@ -86,6 +86,5 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory);
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
void registerDataTypeDomainGeo(DataTypeFactory & factory);
void registerDataTypeOneElementTuple(DataTypeFactory & factory);
}

View File

@ -4,6 +4,7 @@
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationFixedString.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadHelpers.h>
@ -22,10 +23,8 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_READ_ALL_DATA;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int TOO_LARGE_STRING_SIZE;
}
@ -34,184 +33,6 @@ std::string DataTypeFixedString::doGetName() const
return "FixedString(" + toString(n) + ")";
}
void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = get<const String &>(field);
ostr.write(s.data(), std::min(s.size(), n));
if (s.size() < n)
for (size_t i = s.size(); i < n; ++i)
ostr.write(0);
}
void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const
{
field = String();
String & s = get<String &>(field);
s.resize(n);
istr.readStrict(s.data(), n);
}
void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n);
}
void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars();
size_t old_size = data.size();
data.resize(old_size + n);
try
{
istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n);
}
catch (...)
{
data.resize_assume_reserved(old_size);
throw;
}
}
void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars();
size_t size = data.size() / n;
if (limit == 0 || offset + limit > size)
limit = size - offset;
if (limit)
ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit);
}
void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t initial_size = data.size();
size_t max_bytes = limit * n;
data.resize(initial_size + max_bytes);
size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes);
if (read_bytes % n != 0)
throw Exception("Cannot read all data of type FixedString. Bytes read:" + toString(read_bytes) + ". String size:" + toString(n) + ".",
ErrorCodes::CANNOT_READ_ALL_DATA);
data.resize(initial_size + read_bytes);
}
void DataTypeFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr);
}
void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeAnyEscapedString<'\''>(pos, pos + n, ostr);
}
void DataTypeFixedString::alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const
{
size_t length = chars.size() - old_size;
if (length < n)
{
chars.resize_fill(old_size + n);
}
else if (length > n)
{
chars.resize_assume_reserved(old_size);
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
}
}
template <typename Reader>
static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader)
{
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t prev_size = data.size();
try
{
reader(data);
self.alignStringLength(data, prev_size);
}
catch (...)
{
data.resize_assume_reserved(prev_size);
throw;
}
}
void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeAnyQuotedString<'\''>(pos, pos + n, ostr);
}
void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}
void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeJSONString(pos, pos + n, ostr, settings);
}
void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeXMLStringForTextElement(pos, pos + n, ostr);
}
void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeCSVString(pos, pos + n, ostr);
}
void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
}
MutableColumnPtr DataTypeFixedString::createColumn() const
{
return ColumnFixedString::create(n);
@ -227,6 +48,11 @@ bool DataTypeFixedString::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n;
}
SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const
{
return std::make_shared<SerializationFixedString>(n);
}
static DataTypePtr create(const ASTPtr & arguments)
{

View File

@ -41,38 +41,14 @@ public:
return n;
}
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
bool equals(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return false; }
bool isComparable() const override { return true; }

View File

@ -13,6 +13,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/Serializations/SerializationLowCardinality.h>
#include <Parsers/IAST.h>
namespace DB
@ -25,19 +26,6 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
const ColumnLowCardinality & getColumnLowCardinality(const IColumn & column)
{
return typeid_cast<const ColumnLowCardinality &>(column);
}
ColumnLowCardinality & getColumnLowCardinality(IColumn & column)
{
return typeid_cast<ColumnLowCardinality &>(column);
}
}
DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
: dictionary_type(std::move(dictionary_type_))
{
@ -50,785 +38,6 @@ DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
+ dictionary_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void DataTypeLowCardinality::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::DictionaryKeys);
dictionary_type->enumerateStreams(callback, path);
path.back() = Substream::DictionaryIndexes;
callback(path, *this);
path.pop_back();
}
struct KeysSerializationVersion
{
enum Value
{
/// Version is written at the start of <name.dict.bin>.
/// Dictionary is written as number N and N keys after them.
/// Dictionary can be shared for continuous range of granules, so some marks may point to the same position.
/// Shared dictionary is stored in state and is read once.
SharedDictionariesWithAdditionalKeys = 1,
};
Value value;
static void checkVersion(UInt64 version)
{
if (version != SharedDictionariesWithAdditionalKeys)
throw Exception("Invalid version for DataTypeLowCardinality key column.", ErrorCodes::LOGICAL_ERROR);
}
explicit KeysSerializationVersion(UInt64 version) : value(static_cast<Value>(version)) { checkVersion(version); }
};
/// Version is stored at the start of each granule. It's used to store indexes type and flags.
struct IndexesSerializationType
{
using SerializationType = UInt64;
/// Need to read dictionary if it wasn't.
static constexpr SerializationType NeedGlobalDictionaryBit = 1u << 8u;
/// Need to read additional keys. Additional keys are stored before indexes as value N and N keys after them.
static constexpr SerializationType HasAdditionalKeysBit = 1u << 9u;
/// Need to update dictionary. It means that previous granule has different dictionary.
static constexpr SerializationType NeedUpdateDictionary = 1u << 10u;
enum Type
{
TUInt8 = 0,
TUInt16,
TUInt32,
TUInt64,
};
Type type;
bool has_additional_keys;
bool need_global_dictionary;
bool need_update_dictionary;
static constexpr SerializationType resetFlags(SerializationType type)
{
return type & (~(HasAdditionalKeysBit | NeedGlobalDictionaryBit | NeedUpdateDictionary));
}
static void checkType(SerializationType type)
{
UInt64 value = resetFlags(type);
if (value <= TUInt64)
return;
throw Exception("Invalid type for DataTypeLowCardinality index column.", ErrorCodes::LOGICAL_ERROR);
}
void serialize(WriteBuffer & buffer) const
{
SerializationType val = type;
if (has_additional_keys)
val |= HasAdditionalKeysBit;
if (need_global_dictionary)
val |= NeedGlobalDictionaryBit;
if (need_update_dictionary)
val |= NeedUpdateDictionary;
writeIntBinary(val, buffer);
}
void deserialize(ReadBuffer & buffer)
{
SerializationType val;
readIntBinary(val, buffer);
checkType(val);
has_additional_keys = (val & HasAdditionalKeysBit) != 0;
need_global_dictionary = (val & NeedGlobalDictionaryBit) != 0;
need_update_dictionary = (val & NeedUpdateDictionary) != 0;
type = static_cast<Type>(resetFlags(val));
}
IndexesSerializationType(const IColumn & column,
bool has_additional_keys_,
bool need_global_dictionary_,
bool enumerate_dictionaries)
: has_additional_keys(has_additional_keys_)
, need_global_dictionary(need_global_dictionary_)
, need_update_dictionary(enumerate_dictionaries)
{
if (typeid_cast<const ColumnUInt8 *>(&column))
type = TUInt8;
else if (typeid_cast<const ColumnUInt16 *>(&column))
type = TUInt16;
else if (typeid_cast<const ColumnUInt32 *>(&column))
type = TUInt32;
else if (typeid_cast<const ColumnUInt64 *>(&column))
type = TUInt64;
else
throw Exception("Invalid Indexes column for IndexesSerializationType. Expected ColumnUInt*, got "
+ column.getName(), ErrorCodes::LOGICAL_ERROR);
}
DataTypePtr getDataType() const
{
if (type == TUInt8)
return std::make_shared<DataTypeUInt8>();
if (type == TUInt16)
return std::make_shared<DataTypeUInt16>();
if (type == TUInt32)
return std::make_shared<DataTypeUInt32>();
if (type == TUInt64)
return std::make_shared<DataTypeUInt64>();
throw Exception("Can't create DataType from IndexesSerializationType.", ErrorCodes::LOGICAL_ERROR);
}
IndexesSerializationType() = default;
};
struct SerializeStateLowCardinality : public IDataType::SerializeBinaryBulkState
{
KeysSerializationVersion key_version;
MutableColumnUniquePtr shared_dictionary;
explicit SerializeStateLowCardinality(UInt64 key_version_) : key_version(key_version_) {}
};
struct DeserializeStateLowCardinality : public IDataType::DeserializeBinaryBulkState
{
KeysSerializationVersion key_version;
ColumnUniquePtr global_dictionary;
IndexesSerializationType index_type;
ColumnPtr additional_keys;
ColumnPtr null_map;
UInt64 num_pending_rows = 0;
/// If dictionary should be updated.
/// Can happen is some granules was skipped while reading from MergeTree.
/// We should store this flag in State because
/// in case of long block of empty arrays we may not need read dictionary at first reading.
bool need_update_dictionary = false;
explicit DeserializeStateLowCardinality(UInt64 key_version_) : key_version(key_version_) {}
};
static SerializeStateLowCardinality * checkAndGetLowCardinalitySerializeState(
IDataType::SerializeBinaryBulkStatePtr & state)
{
if (!state)
throw Exception("Got empty state for DataTypeLowCardinality.", ErrorCodes::LOGICAL_ERROR);
auto * low_cardinality_state = typeid_cast<SerializeStateLowCardinality *>(state.get());
if (!low_cardinality_state)
{
auto & state_ref = *state;
throw Exception("Invalid SerializeBinaryBulkState for DataTypeLowCardinality. Expected: "
+ demangle(typeid(SerializeStateLowCardinality).name()) + ", got "
+ demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
}
return low_cardinality_state;
}
static DeserializeStateLowCardinality * checkAndGetLowCardinalityDeserializeState(
IDataType::DeserializeBinaryBulkStatePtr & state)
{
if (!state)
throw Exception("Got empty state for DataTypeLowCardinality.", ErrorCodes::LOGICAL_ERROR);
auto * low_cardinality_state = typeid_cast<DeserializeStateLowCardinality *>(state.get());
if (!low_cardinality_state)
{
auto & state_ref = *state;
throw Exception("Invalid DeserializeBinaryBulkState for DataTypeLowCardinality. Expected: "
+ demangle(typeid(DeserializeStateLowCardinality).name()) + ", got "
+ demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
}
return low_cardinality_state;
}
void DataTypeLowCardinality::serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::DictionaryKeys);
auto * stream = settings.getter(settings.path);
settings.path.pop_back();
if (!stream)
throw Exception("Got empty stream in DataTypeLowCardinality::serializeBinaryBulkStatePrefix",
ErrorCodes::LOGICAL_ERROR);
/// Write version and create SerializeBinaryBulkState.
UInt64 key_version = KeysSerializationVersion::SharedDictionariesWithAdditionalKeys;
writeIntBinary(key_version, *stream);
state = std::make_shared<SerializeStateLowCardinality>(key_version);
}
void DataTypeLowCardinality::serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state);
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
if (low_cardinality_state->shared_dictionary && settings.low_cardinality_max_dictionary_size)
{
auto nested_column = low_cardinality_state->shared_dictionary->getNestedNotNullableColumn();
settings.path.push_back(Substream::DictionaryKeys);
auto * stream = settings.getter(settings.path);
settings.path.pop_back();
if (!stream)
throw Exception("Got empty stream in DataTypeLowCardinality::serializeBinaryBulkStateSuffix",
ErrorCodes::LOGICAL_ERROR);
UInt64 num_keys = nested_column->size();
writeIntBinary(num_keys, *stream);
removeNullable(dictionary_type)->serializeBinaryBulk(*nested_column, *stream, 0, num_keys);
low_cardinality_state->shared_dictionary = nullptr;
}
}
void DataTypeLowCardinality::deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::DictionaryKeys);
auto * stream = settings.getter(settings.path);
settings.path.pop_back();
if (!stream)
return;
UInt64 keys_version;
readIntBinary(keys_version, *stream);
state = std::make_shared<DeserializeStateLowCardinality>(keys_version);
}
namespace
{
template <typename T>
PaddedPODArray<T> * getIndexesData(IColumn & indexes)
{
auto * column = typeid_cast<ColumnVector<T> *>(&indexes);
if (column)
return &column->getData();
return nullptr;
}
struct IndexMapsWithAdditionalKeys
{
MutableColumnPtr dictionary_map;
MutableColumnPtr additional_keys_map;
};
template <typename T>
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeysRef(PaddedPODArray<T> & index, size_t dict_size)
{
PaddedPODArray<T> copy(index.cbegin(), index.cend());
HashMap<T, T> dict_map;
HashMap<T, T> add_keys_map;
for (auto val : index)
{
if (val < dict_size)
dict_map.insert({val, dict_map.size()});
else
add_keys_map.insert({val, add_keys_map.size()});
}
auto dictionary_map = ColumnVector<T>::create(dict_map.size());
auto additional_keys_map = ColumnVector<T>::create(add_keys_map.size());
auto & dict_data = dictionary_map->getData();
auto & add_keys_data = additional_keys_map->getData();
for (auto val : dict_map)
dict_data[val.second] = val.first;
for (auto val : add_keys_map)
add_keys_data[val.second] = val.first - dict_size;
for (auto & val : index)
val = val < dict_size ? dict_map[val]
: add_keys_map[val] + dict_map.size();
for (size_t i = 0; i < index.size(); ++i)
{
T expected = index[i] < dict_data.size() ? dict_data[index[i]]
: add_keys_data[index[i] - dict_data.size()] + dict_size;
if (expected != copy[i])
throw Exception("Expected " + toString(expected) + ", but got " + toString(copy[i]), ErrorCodes::LOGICAL_ERROR);
}
return {std::move(dictionary_map), std::move(additional_keys_map)};
}
template <typename T>
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(PaddedPODArray<T> & index, size_t dict_size)
{
T max_less_dict_size = 0;
T max_value = 0;
auto size = index.size();
if (size == 0)
return {ColumnVector<T>::create(), ColumnVector<T>::create()};
for (size_t i = 0; i < size; ++i)
{
auto val = index[i];
if (val < dict_size)
max_less_dict_size = std::max(max_less_dict_size, val);
max_value = std::max(max_value, val);
}
auto map_size = UInt64(max_less_dict_size) + 1;
auto overflow_map_size = max_value >= dict_size ? (UInt64(max_value - dict_size) + 1) : 0;
PaddedPODArray<T> map(map_size, 0);
PaddedPODArray<T> overflow_map(overflow_map_size, 0);
T zero_pos_value = 0;
T zero_pos_overflowed_value = 0;
UInt64 cur_pos = 0;
UInt64 cur_overflowed_pos = 0;
for (size_t i = 0; i < size; ++i)
{
T val = index[i];
if (val < dict_size)
{
if (cur_pos == 0)
{
zero_pos_value = val;
++cur_pos;
}
else if (map[val] == 0 && val != zero_pos_value)
{
map[val] = cur_pos;
++cur_pos;
}
}
else
{
T shifted_val = val - dict_size;
if (cur_overflowed_pos == 0)
{
zero_pos_overflowed_value = shifted_val;
++cur_overflowed_pos;
}
else if (overflow_map[shifted_val] == 0 && shifted_val != zero_pos_overflowed_value)
{
overflow_map[shifted_val] = cur_overflowed_pos;
++cur_overflowed_pos;
}
}
}
auto dictionary_map = ColumnVector<T>::create(cur_pos);
auto additional_keys_map = ColumnVector<T>::create(cur_overflowed_pos);
auto & dict_data = dictionary_map->getData();
auto & add_keys_data = additional_keys_map->getData();
for (size_t i = 0; i < map_size; ++i)
if (map[i])
dict_data[map[i]] = static_cast<T>(i);
for (size_t i = 0; i < overflow_map_size; ++i)
if (overflow_map[i])
add_keys_data[overflow_map[i]] = static_cast<T>(i);
if (cur_pos)
dict_data[0] = zero_pos_value;
if (cur_overflowed_pos)
add_keys_data[0] = zero_pos_overflowed_value;
for (size_t i = 0; i < size; ++i)
{
T & val = index[i];
if (val < dict_size)
val = map[val];
else
val = overflow_map[val - dict_size] + cur_pos;
}
return {std::move(dictionary_map), std::move(additional_keys_map)};
}
/// Update column and return map with old indexes.
/// Let N is the number of distinct values which are less than max_size;
/// old_column - column before function call;
/// new_column - column after function call:
/// * if old_column[i] < max_size, than
/// dictionary_map[new_column[i]] = old_column[i]
/// * else
/// additional_keys_map[new_column[i]] = old_column[i] - dict_size + N
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(IColumn & column, size_t dict_size)
{
if (auto * data_uint8 = getIndexesData<UInt8>(column))
return mapIndexWithAdditionalKeys(*data_uint8, dict_size);
else if (auto * data_uint16 = getIndexesData<UInt16>(column))
return mapIndexWithAdditionalKeys(*data_uint16, dict_size);
else if (auto * data_uint32 = getIndexesData<UInt32>(column))
return mapIndexWithAdditionalKeys(*data_uint32, dict_size);
else if (auto * data_uint64 = getIndexesData<UInt64>(column))
return mapIndexWithAdditionalKeys(*data_uint64, dict_size);
else
throw Exception("Indexes column for mapIndexWithAdditionalKeys must be UInt, got" + column.getName(),
ErrorCodes::LOGICAL_ERROR);
}
}
void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::DictionaryKeys);
auto * keys_stream = settings.getter(settings.path);
settings.path.back() = Substream::DictionaryIndexes;
auto * indexes_stream = settings.getter(settings.path);
settings.path.pop_back();
if (!keys_stream && !indexes_stream)
return;
if (!keys_stream)
throw Exception("Got empty stream for DataTypeLowCardinality keys.", ErrorCodes::LOGICAL_ERROR);
if (!indexes_stream)
throw Exception("Got empty stream for DataTypeLowCardinality indexes.", ErrorCodes::LOGICAL_ERROR);
const ColumnLowCardinality & low_cardinality_column = typeid_cast<const ColumnLowCardinality &>(column);
auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state);
auto & global_dictionary = low_cardinality_state->shared_dictionary;
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
bool need_update_dictionary = global_dictionary == nullptr;
if (need_update_dictionary)
global_dictionary = createColumnUnique(*dictionary_type);
size_t max_limit = column.size() - offset;
limit = limit ? std::min(limit, max_limit) : max_limit;
/// Do not write anything for empty column. (May happen while writing empty arrays.)
if (limit == 0)
return;
auto sub_column = low_cardinality_column.cutAndCompact(offset, limit);
ColumnPtr positions = sub_column->getIndexesPtr();
ColumnPtr keys = sub_column->getDictionary().getNestedColumn();
if (settings.low_cardinality_max_dictionary_size)
{
/// Insert used_keys into global dictionary and update sub_index.
auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(),
settings.low_cardinality_max_dictionary_size);
size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size();
ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size);
if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size)
throw Exception("Got dictionary with size " + toString(global_dictionary->size()) +
" but max dictionary size is " + toString(settings.low_cardinality_max_dictionary_size),
ErrorCodes::LOGICAL_ERROR);
positions = indexes_with_overflow.indexes->index(*positions, 0);
keys = std::move(indexes_with_overflow.overflowed_keys);
if (global_dictionary->size() < settings.low_cardinality_max_dictionary_size && !keys->empty())
throw Exception("Has additional keys, but dict size is " + toString(global_dictionary->size()) +
" which is less then max dictionary size (" + toString(settings.low_cardinality_max_dictionary_size) + ")",
ErrorCodes::LOGICAL_ERROR);
}
if (const auto * nullable_keys = checkAndGetColumn<ColumnNullable>(*keys))
keys = nullable_keys->getNestedColumnPtr();
bool need_additional_keys = !keys->empty();
bool need_dictionary = settings.low_cardinality_max_dictionary_size != 0;
bool need_write_dictionary = !settings.low_cardinality_use_single_dictionary_for_part
&& global_dictionary->size() >= settings.low_cardinality_max_dictionary_size;
IndexesSerializationType index_version(*positions, need_additional_keys, need_dictionary, need_update_dictionary);
index_version.serialize(*indexes_stream);
if (need_write_dictionary)
{
const auto & nested_column = global_dictionary->getNestedNotNullableColumn();
UInt64 num_keys = nested_column->size();
writeIntBinary(num_keys, *keys_stream);
removeNullable(dictionary_type)->serializeBinaryBulk(*nested_column, *keys_stream, 0, num_keys);
low_cardinality_state->shared_dictionary = nullptr;
}
if (need_additional_keys)
{
UInt64 num_keys = keys->size();
writeIntBinary(num_keys, *indexes_stream);
removeNullable(dictionary_type)->serializeBinaryBulk(*keys, *indexes_stream, 0, num_keys);
}
UInt64 num_rows = positions->size();
writeIntBinary(num_rows, *indexes_stream);
index_version.getDataType()->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows);
}
void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * /* cache */) const
{
ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(column);
settings.path.push_back(Substream::DictionaryKeys);
auto * keys_stream = settings.getter(settings.path);
settings.path.back() = Substream::DictionaryIndexes;
auto * indexes_stream = settings.getter(settings.path);
settings.path.pop_back();
if (!keys_stream && !indexes_stream)
return;
if (!keys_stream)
throw Exception("Got empty stream for DataTypeLowCardinality keys.", ErrorCodes::LOGICAL_ERROR);
if (!indexes_stream)
throw Exception("Got empty stream for DataTypeLowCardinality indexes.", ErrorCodes::LOGICAL_ERROR);
auto * low_cardinality_state = checkAndGetLowCardinalityDeserializeState(state);
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
auto read_dictionary = [this, low_cardinality_state, keys_stream]()
{
UInt64 num_keys;
readIntBinary(num_keys, *keys_stream);
auto keys_type = removeNullable(dictionary_type);
auto global_dict_keys = keys_type->createColumn();
keys_type->deserializeBinaryBulk(*global_dict_keys, *keys_stream, num_keys, 0);
auto column_unique = createColumnUnique(*dictionary_type, std::move(global_dict_keys));
low_cardinality_state->global_dictionary = std::move(column_unique);
};
auto read_additional_keys = [this, low_cardinality_state, indexes_stream]()
{
UInt64 num_keys;
readIntBinary(num_keys, *indexes_stream);
auto keys_type = removeNullable(dictionary_type);
auto additional_keys = keys_type->createColumn();
keys_type->deserializeBinaryBulk(*additional_keys, *indexes_stream, num_keys, 0);
low_cardinality_state->additional_keys = std::move(additional_keys);
if (!low_cardinality_state->index_type.need_global_dictionary && dictionary_type->isNullable())
{
auto null_map = ColumnUInt8::create(num_keys, 0);
if (num_keys)
null_map->getElement(0) = 1;
low_cardinality_state->null_map = std::move(null_map);
}
};
auto read_indexes = [this, low_cardinality_state, indexes_stream, &low_cardinality_column](UInt64 num_rows)
{
auto indexes_type = low_cardinality_state->index_type.getDataType();
MutableColumnPtr indexes_column = indexes_type->createColumn();
indexes_type->deserializeBinaryBulk(*indexes_column, *indexes_stream, num_rows, 0);
auto & global_dictionary = low_cardinality_state->global_dictionary;
const auto & additional_keys = low_cardinality_state->additional_keys;
bool has_additional_keys = low_cardinality_state->index_type.has_additional_keys;
bool column_is_empty = low_cardinality_column.empty();
if (!low_cardinality_state->index_type.need_global_dictionary)
{
ColumnPtr keys_column = additional_keys;
if (low_cardinality_state->null_map)
keys_column = ColumnNullable::create(additional_keys, low_cardinality_state->null_map);
low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*keys_column, *indexes_column);
}
else if (!has_additional_keys)
{
if (column_is_empty)
low_cardinality_column.setSharedDictionary(global_dictionary);
auto local_column = ColumnLowCardinality::create(global_dictionary, std::move(indexes_column));
low_cardinality_column.insertRangeFrom(*local_column, 0, num_rows);
}
else
{
auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size());
ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size());
ColumnLowCardinality::Index(indexes_column->getPtr()).check(
maps.dictionary_map->size() + maps.additional_keys_map->size());
auto used_keys = IColumn::mutate(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0));
if (!maps.additional_keys_map->empty())
{
auto used_add_keys = additional_keys->index(*maps.additional_keys_map, 0);
if (dictionary_type->isNullable())
{
ColumnPtr null_map = ColumnUInt8::create(used_add_keys->size(), 0);
used_add_keys = ColumnNullable::create(used_add_keys, null_map);
}
used_keys->insertRangeFrom(*used_add_keys, 0, used_add_keys->size());
}
low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*used_keys, *indexes_column);
}
};
if (!settings.continuous_reading)
{
low_cardinality_state->num_pending_rows = 0;
/// Remember in state that some granules were skipped and we need to update dictionary.
low_cardinality_state->need_update_dictionary = true;
}
while (limit)
{
if (low_cardinality_state->num_pending_rows == 0)
{
if (indexes_stream->eof())
break;
auto & index_type = low_cardinality_state->index_type;
auto & global_dictionary = low_cardinality_state->global_dictionary;
index_type.deserialize(*indexes_stream);
bool need_update_dictionary =
!global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary;
if (index_type.need_global_dictionary && need_update_dictionary)
{
read_dictionary();
low_cardinality_state->need_update_dictionary = false;
}
if (low_cardinality_state->index_type.has_additional_keys)
read_additional_keys();
else
low_cardinality_state->additional_keys = nullptr;
readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream);
}
size_t num_rows_to_read = std::min<UInt64>(limit, low_cardinality_state->num_pending_rows);
read_indexes(num_rows_to_read);
limit -= num_rows_to_read;
low_cardinality_state->num_pending_rows -= num_rows_to_read;
}
}
void DataTypeLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
dictionary_type->serializeBinary(field, ostr);
}
void DataTypeLowCardinality::deserializeBinary(Field & field, ReadBuffer & istr) const
{
dictionary_type->deserializeBinary(field, istr);
}
void DataTypeLowCardinality::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
serializeImpl(column, row_num, &IDataType::serializeBinary, ostr);
}
void DataTypeLowCardinality::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
deserializeImpl(column, &IDataType::deserializeBinary, istr);
}
void DataTypeLowCardinality::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsTextEscaped, ostr, settings);
}
void DataTypeLowCardinality::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings);
}
void DataTypeLowCardinality::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsTextQuoted, ostr, settings);
}
void DataTypeLowCardinality::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsTextQuoted, istr, settings);
}
void DataTypeLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsWholeText, istr, settings);
}
void DataTypeLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsTextCSV, ostr, settings);
}
void DataTypeLowCardinality::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsTextCSV, istr, settings);
}
void DataTypeLowCardinality::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsText, ostr, settings);
}
void DataTypeLowCardinality::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsTextJSON, ostr, settings);
}
void DataTypeLowCardinality::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsTextJSON, istr, settings);
}
void DataTypeLowCardinality::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &IDataType::serializeAsTextXML, ostr, settings);
}
template <typename... Params, typename... Args>
void DataTypeLowCardinality::serializeImpl(
const IColumn & column, size_t row_num, DataTypeLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
{
const auto & low_cardinality_column = getColumnLowCardinality(column);
size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(row_num);
(dictionary_type.get()->*func)(*low_cardinality_column.getDictionary().getNestedColumn(), unique_row_number, std::forward<Args>(args)...);
}
template <typename... Params, typename... Args>
void DataTypeLowCardinality::deserializeImpl(
IColumn & column, DataTypeLowCardinality::DeserializeFunctionPtr<Params...> func, Args &&... args) const
{
auto & low_cardinality_column= getColumnLowCardinality(column);
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
(dictionary_type.get()->*func)(*temp_column, std::forward<Args>(args)...);
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
}
namespace
{
template <typename Creator>
@ -927,6 +136,11 @@ bool DataTypeLowCardinality::equals(const IDataType & rhs) const
return dictionary_type->equals(*low_cardinality_rhs.dictionary_type);
}
SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const
{
return std::make_shared<SerializationLowCardinality>(dictionary_type);
}
static DataTypePtr create(const ASTPtr & arguments)
{

View File

@ -24,50 +24,6 @@ public:
const char * getFamilyName() const override { return "LowCardinality"; }
TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; }
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
@ -100,6 +56,7 @@ public:
static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys);
private:
SerializationPtr doGetDefaultSerialization() const override;
template <typename ... Params>
using SerializeFunctionPtr = void (IDataType::*)(const IColumn &, size_t, Params ...) const;

Some files were not shown because too many files have changed in this diff Show More