Merge remote-tracking branch 'origin' into integration-2

This commit is contained in:
Yatsishin Ilya 2021-02-25 12:53:49 +03:00
commit de20de8a2f
110 changed files with 4558 additions and 1596 deletions

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80

View File

@ -70,6 +70,7 @@ function start_server
--path "$FASTTEST_DATA"
--user_files_path "$FASTTEST_DATA/user_files"
--top_level_domains_path "$FASTTEST_DATA/top_level_domains"
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
)
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
server_pid=$!
@ -375,7 +376,7 @@ function run_tests
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
start_server

View File

@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
# Brown University Benchmark
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Download the data:
```
@ -153,7 +153,7 @@ ORDER BY dt,
hr;
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently was each door opened?
-- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name,
device_floor,
@ -412,3 +412,5 @@ ORDER BY yr,
```
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->

View File

@ -9,7 +9,7 @@ Calculates the arithmetic mean.
**Syntax**
``` sql
avgWeighted(x)
avg(x)
```
**Arguments**

View File

@ -693,6 +693,178 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `NULL` if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- Zero date or zero date with time if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toLowCardinality {#tolowcardinality}
Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.

View File

@ -44,7 +44,7 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
A table object with the same columns as the original MySQL table.
!!! info "Note"
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
**Examples**

View File

@ -0,0 +1,416 @@
---
toc_priority: 20
toc_title: Brown University Benchmark
---
# Brown University Benchmark
`MgBench` — это аналитический тест производительности для данных журнала событий, сгенерированных машиной. Бенчмарк разработан [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Скачать данные:
```
wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
```
Распаковать данные:
```
xz -v -d mgbench{1..3}.csv.xz
```
Создание таблиц:
```
CREATE DATABASE mgbench;
CREATE TABLE mgbench.logs1 (
log_time DateTime,
machine_name LowCardinality(String),
machine_group LowCardinality(String),
cpu_idle Nullable(Float32),
cpu_nice Nullable(Float32),
cpu_system Nullable(Float32),
cpu_user Nullable(Float32),
cpu_wio Nullable(Float32),
disk_free Nullable(Float32),
disk_total Nullable(Float32),
part_max_used Nullable(Float32),
load_fifteen Nullable(Float32),
load_five Nullable(Float32),
load_one Nullable(Float32),
mem_buffers Nullable(Float32),
mem_cached Nullable(Float32),
mem_free Nullable(Float32),
mem_shared Nullable(Float32),
swap_free Nullable(Float32),
bytes_in Nullable(Float32),
bytes_out Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
CREATE TABLE mgbench.logs2 (
log_time DateTime,
client_ip IPv4,
request String,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
CREATE TABLE mgbench.logs3 (
log_time DateTime64,
device_id FixedString(15),
device_name LowCardinality(String),
device_type LowCardinality(String),
device_floor UInt8,
event_type LowCardinality(String),
event_unit FixedString(1),
event_value Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
```
Вставка данных:
```
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
```
Запуск тестов производительности:
```
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN ('anansi','aragog','urd')
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
) AS r
GROUP BY machine_name;
-- Q1.2: Which computer lab machines have been offline in the past day?
SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE 'cslab%' OR
machine_name LIKE 'mslab%')
AND load_one IS NULL
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
ORDER BY machine_name,
log_time;
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = 'babbage'
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = 'Servers'
AND cpu_wio > 0.99
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
-- Q1.5: Which externally reachable VMs have run low on memory?
SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = 'DMZ'
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) < 10000
ORDER BY machine_name,
dt;
-- Q1.6: What is the total hourly network traffic across all file servers?
SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
'tootsie','trident','wrigley','york')
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
-- Q2.1: Which requests have caused server errors within the past 2 weeks?
SELECT *
FROM logs2
WHERE status_code >= 500
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
ORDER BY log_time;
-- Q2.2: During a specific 2-week period, was the user password file leaked?
SELECT *
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND request LIKE '%/etc/passwd%'
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
-- Q2.3: What was the average path depth for top-level requests in the past month?
SELECT top_level,
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
FROM (
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
request
FROM (
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
request
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
) AS r
WHERE len > 0
) AS s
WHERE top_level IN ('/about','/courses','/degrees','/events',
'/grad','/industry','/news','/people',
'/publications','/research','/teaching','/ugrad')
GROUP BY top_level
ORDER BY top_level;
-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
SELECT client_ip,
COUNT(*) AS num_requests
FROM logs2
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
GROUP BY client_ip
HAVING COUNT(*) >= 100000
ORDER BY num_requests DESC;
-- Q2.5: What are the daily unique visitors?
SELECT dt,
COUNT(DISTINCT client_ip)
FROM (
SELECT CAST(log_time AS DATE) AS dt,
client_ip
FROM logs2
) AS r
GROUP BY dt
ORDER BY dt;
-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
MAX(transfer) / 125000000.0 AS transfer_max
FROM (
SELECT log_time,
SUM(object_size) AS transfer
FROM logs2
GROUP BY log_time
) AS r;
-- Q3.1: Did the indoor temperature reach freezing over the weekend?
SELECT *
FROM logs3
WHERE event_type = 'temperature'
AND event_value <= 32.0
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name,
device_floor,
COUNT(*) AS ct
FROM logs3
WHERE event_type = 'door_open'
AND log_time >= '2019-06-01 00:00:00.000'
GROUP BY device_name,
device_floor
ORDER BY ct DESC;
-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
WITH temperature AS (
SELECT dt,
device_name,
device_type,
device_floor
FROM (
SELECT dt,
hr,
device_name,
device_type,
device_floor,
AVG(event_value) AS temperature_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
device_name,
device_type,
device_floor,
event_value
FROM logs3
WHERE event_type = 'temperature'
) AS r
GROUP BY dt,
hr,
device_name,
device_type,
device_floor
) AS s
GROUP BY dt,
device_name,
device_type,
device_floor
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
)
SELECT DISTINCT device_name,
device_type,
device_floor,
'WINTER'
FROM temperature
WHERE dt >= DATE '2018-12-01'
AND dt < DATE '2019-03-01'
UNION
SELECT DISTINCT device_name,
device_type,
device_floor,
'SUMMER'
FROM temperature
WHERE dt >= DATE '2019-06-01'
AND dt < DATE '2019-09-01';
-- Q3.6: For each device category, what are the monthly power consumption metrics?
SELECT yr,
mo,
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
SUM(printer_hourly_avg) AS printer_monthly_sum,
AVG(printer_hourly_avg) AS printer_monthly_avg,
SUM(projector_hourly_avg) AS projector_monthly_sum,
AVG(projector_hourly_avg) AS projector_monthly_avg,
SUM(vending_hourly_avg) AS vending_monthly_sum,
AVG(vending_hourly_avg) AS vending_monthly_avg
FROM (
SELECT dt,
yr,
mo,
hr,
AVG(coffee) AS coffee_hourly_avg,
AVG(printer) AS printer_hourly_avg,
AVG(projector) AS projector_hourly_avg,
AVG(vending) AS vending_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(YEAR FROM log_time) AS yr,
EXTRACT(MONTH FROM log_time) AS mo,
EXTRACT(HOUR FROM log_time) AS hr,
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
FROM logs3
WHERE device_type = 'meter'
) AS r
GROUP BY dt,
yr,
mo,
hr
) AS s
GROUP BY yr,
mo
ORDER BY yr,
mo;
```
Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.tech/play?user=play), [пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets/brown-benchmark/) <!--hide-->

View File

@ -1355,6 +1355,52 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
└─────┘
```
**Синтаксис**
``` sql
arraySum(arr)
```
**Возвращаемое значение**
- Число.
Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Параметры**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
```sql
SELECT arraySum([2,3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 5 │
└─────┘
```
Запрос:
``` sql
SELECT arraySum(x -> x*x, [2, 3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 13 │
└─────┘
```
## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.

View File

@ -658,6 +658,178 @@ AS parseDateTimeBestEffortUS;
└─────────────────────────——┘
```
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- `NULL`, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает нулевую дату (`1970-01-01`) или нулевую дату со временем (`1970-01-01 00:00:00`), если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- Нулевая дата или нулевая дата со временем, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toUnixTimestamp64Milli
## toUnixTimestamp64Micro
## toUnixTimestamp64Nano

View File

@ -7,6 +7,8 @@ toc_title: mysql
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
**Синтаксис**
``` sql
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
```
@ -23,13 +25,13 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
- `password` — пароль пользователя.
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Если `replace_query=1`, то запрос заменяется.
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Возможные значения:
- `0` - выполняется запрос `INSERT INTO`.
- `1` - выполняется запрос `REPLACE INTO`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`. Чтобы узнать какие `on_duplicate_clause` можно использовать с секцией `ON DUPLICATE KEY` обратитесь к документации MySQL.
Чтобы указать `'on_duplicate_clause'` необходимо передать `0` в параметр `replace_query`. Если одновременно передать `replace_query = 1` и `'on_duplicate_clause'`, то ClickHouse сгенерирует исключение.
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
@ -39,46 +41,59 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
## Пример использования {#primer-ispolzovaniia}
!!! note "Примечание"
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
**Примеры**
Таблица в MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
mysql> select * from test;
+--------+--------------+-------+----------------+
| int_id | int_nullable | float | float_nullable |
+--------+--------------+-------+----------------+
| 1 | NULL | 2 | NULL |
+--------+--------------+-------+----------------+
1 row in set (0,00 sec)
mysql> SELECT * FROM test;
+--------+-------+
| int_id | float |
+--------+-------+
| 1 | 2 |
+--------+-------+
```
Получение данных в ClickHouse:
``` sql
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴────────────────
┌─int_id─┬─float─┐
│ 1 │ 2 │
└────────┴───────┘
```
## Смотрите также {#smotrite-takzhe}
Замена и вставка:
```sql
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
``` text
┌─int_id─┬─float─┐
│ 1 │ 3 │
│ 2 │ 4 │
└────────┴───────┘
```
**Смотрите также**
- [Движок таблиц MySQL](../../sql-reference/table-functions/mysql.md)
- [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table_functions/mysql/) <!--hide-->

View File

@ -106,6 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
bool has_null_arguments) const
{
String name = getAliasToOrName(name_param);
bool is_case_insensitive = false;
Value found;
/// Find by exact match.
@ -115,7 +116,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second;
is_case_insensitive = true;
}
const Context * query_context = nullptr;
if (CurrentThread::isInitialized())
@ -126,7 +130,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name);
query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments)

View File

@ -56,7 +56,7 @@ class ReservoirSamplerDeterministic
{
bool good(const UInt32 hash)
{
return hash == ((hash >> skip_degree) << skip_degree);
return !(hash & skip_mask);
}
public:
@ -135,11 +135,8 @@ public:
throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size");
sorted = false;
if (b.skip_degree > skip_degree)
{
skip_degree = b.skip_degree;
thinOut();
}
if (skip_degree < b.skip_degree)
setSkipDegree(b.skip_degree);
for (const auto & sample : b.samples)
if (good(sample.second))
@ -184,22 +181,39 @@ private:
size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not).
bool sorted = false;
Array samples;
UInt8 skip_degree = 0; /// The number N determining that we save only one per 2^N elements in average.
/// The number N determining that we store only one per 2^N elements in average.
UInt8 skip_degree = 0;
/// skip_mask is calculated as (2 ^ skip_degree - 1). We store an element only if (hash & skip_mask) == 0.
/// For example, if skip_degree==0 then skip_mask==0 means we store each element;
/// if skip_degree==1 then skip_mask==0b0001 means we store one per 2 elements in average;
/// if skip_degree==4 then skip_mask==0b1111 means we store one per 16 elements in average.
UInt32 skip_mask = 0;
void insertImpl(const T & v, const UInt32 hash)
{
/// Make a room for plus one element.
while (samples.size() >= max_sample_size)
{
++skip_degree;
if (skip_degree > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
thinOut();
}
setSkipDegree(skip_degree + 1);
samples.emplace_back(v, hash);
}
void setSkipDegree(UInt8 skip_degree_)
{
if (skip_degree_ == skip_degree)
return;
if (skip_degree_ > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
skip_degree = skip_degree_;
if (skip_degree == detail::MAX_SKIP_DEGREE)
skip_mask = static_cast<UInt32>(-1);
else
skip_mask = (1 << skip_degree) - 1;
thinOut();
}
void thinOut()
{
samples.resize(std::distance(samples.begin(),

View File

@ -474,19 +474,4 @@ ColumnPtr ColumnFixedString::compress() const
});
}
void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
{
size_t length = data.size() - old_size;
if (length < n)
{
data.resize_fill(old_size + n);
}
else if (length > n)
{
data.resize_assume_reserved(old_size);
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
}
}
}

View File

@ -184,8 +184,6 @@ public:
const Chars & getChars() const { return chars; }
size_t getN() const { return n; }
static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
};
}

View File

@ -37,8 +37,12 @@ public:
static constexpr size_t capacity =
#ifndef NDEBUG
/* The stacks are normally larger in debug version due to less inlining. */
64
/* The stacks are normally larger in debug version due to less inlining.
*
* NOTE: it cannot be larger then 56 right now, since otherwise it will
* not fit into minimal PIPE_BUF (512) in TraceCollector.
*/
56
#else
32
#endif

View File

@ -22,7 +22,9 @@ namespace
{
/// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
constexpr size_t QUERY_ID_MAX_LEN = 1024;
///
/// And it cannot be large, since otherwise it will not fit into PIPE_BUF.
constexpr size_t QUERY_ID_MAX_LEN = sizeof("00000000-0000-0000-0000-000000000000") - 1; // 36
}
LazyPipeFDs pipe;
@ -60,10 +62,14 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac
8 * sizeof(char) + // maximum VarUInt length for string size
QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
sizeof(UInt8) + // number of stack frames
sizeof(StackTrace::Frames) + // collected stack trace, maximum capacity
sizeof(StackTrace::FramePointers) + // collected stack trace, maximum capacity
sizeof(TraceType) + // trace type
sizeof(UInt64) + // thread_id
sizeof(Int64); // size
/// Write should be atomic to avoid overlaps
/// (since recursive collect() is possible)
static_assert(buf_size < PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic");
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);

View File

@ -0,0 +1,557 @@
#include <Coordination/Changelog.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
#include <filesystem>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <Common/Exception.h>
#include <Common/SipHash.h>
#include <common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CHECKSUM_DOESNT_MATCH;
extern const int CORRUPTED_DATA;
extern const int UNKNOWN_FORMAT_VERSION;
extern const int LOGICAL_ERROR;
}
namespace
{
constexpr auto DEFAULT_PREFIX = "changelog";
std::string formatChangelogPath(const std::string & prefix, const ChangelogFileDescription & name)
{
std::filesystem::path path(prefix);
path /= std::filesystem::path(name.prefix + "_" + std::to_string(name.from_log_index) + "_" + std::to_string(name.to_log_index) + ".bin");
return path;
}
ChangelogFileDescription getChangelogFileDescription(const std::string & path_str)
{
std::filesystem::path path(path_str);
std::string filename = path.stem();
Strings filename_parts;
boost::split(filename_parts, filename, boost::is_any_of("_"));
if (filename_parts.size() < 3)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path_str);
ChangelogFileDescription result;
result.prefix = filename_parts[0];
result.from_log_index = parse<size_t>(filename_parts[1]);
result.to_log_index = parse<size_t>(filename_parts[2]);
result.path = path_str;
return result;
}
LogEntryPtr makeClone(const LogEntryPtr & entry)
{
return cs_new<nuraft::log_entry>(entry->get_term(), nuraft::buffer::clone(entry->get_buf()), entry->get_val_type());
}
Checksum computeRecordChecksum(const ChangelogRecord & record)
{
SipHash hash;
hash.update(record.header.version);
hash.update(record.header.index);
hash.update(record.header.term);
hash.update(record.header.value_type);
hash.update(record.header.blob_size);
if (record.header.blob_size != 0)
hash.update(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
return hash.get64();
}
}
class ChangelogWriter
{
public:
ChangelogWriter(const std::string & filepath_, WriteMode mode, size_t start_index_)
: filepath(filepath_)
, plain_buf(filepath, DBMS_DEFAULT_BUFFER_SIZE, mode == WriteMode::Rewrite ? -1 : (O_APPEND | O_CREAT | O_WRONLY))
, start_index(start_index_)
{}
off_t appendRecord(ChangelogRecord && record, bool sync)
{
off_t result = plain_buf.count();
writeIntBinary(computeRecordChecksum(record), plain_buf);
writeIntBinary(record.header.version, plain_buf);
writeIntBinary(record.header.index, plain_buf);
writeIntBinary(record.header.term, plain_buf);
writeIntBinary(record.header.value_type, plain_buf);
writeIntBinary(record.header.blob_size, plain_buf);
if (record.header.blob_size != 0)
plain_buf.write(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
entries_written++;
if (sync)
plain_buf.sync();
return result;
}
void truncateToLength(off_t new_length)
{
flush();
plain_buf.truncate(new_length);
plain_buf.seek(new_length, SEEK_SET);
}
void flush()
{
plain_buf.sync();
}
size_t getEntriesWritten() const
{
return entries_written;
}
void setEntriesWritten(size_t entries_written_)
{
entries_written = entries_written_;
}
size_t getStartIndex() const
{
return start_index;
}
void setStartIndex(size_t start_index_)
{
start_index = start_index_;
}
private:
std::string filepath;
WriteBufferFromFile plain_buf;
size_t entries_written = 0;
size_t start_index;
};
struct ChangelogReadResult
{
size_t entries_read;
off_t last_position;
bool error;
};
class ChangelogReader
{
public:
explicit ChangelogReader(const std::string & filepath_)
: filepath(filepath_)
, read_buf(filepath)
{}
ChangelogReadResult readChangelog(IndexToLogEntry & logs, size_t start_log_index, IndexToOffset & index_to_offset, Poco::Logger * log)
{
size_t previous_index = 0;
ChangelogReadResult result{};
try
{
while (!read_buf.eof())
{
result.last_position = read_buf.count();
Checksum record_checksum;
readIntBinary(record_checksum, read_buf);
/// Initialization is required, otherwise checksums may fail
ChangelogRecord record;
readIntBinary(record.header.version, read_buf);
readIntBinary(record.header.index, read_buf);
readIntBinary(record.header.term, read_buf);
readIntBinary(record.header.value_type, read_buf);
readIntBinary(record.header.blob_size, read_buf);
if (record.header.version > CURRENT_CHANGELOG_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
if (record.header.blob_size != 0)
{
auto buffer = nuraft::buffer::alloc(record.header.blob_size);
auto * buffer_begin = reinterpret_cast<char *>(buffer->data_begin());
read_buf.readStrict(buffer_begin, record.header.blob_size);
record.blob = buffer;
}
else
record.blob = nullptr;
if (previous_index != 0 && previous_index + 1 != record.header.index)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Previous log entry {}, next log entry {}, seems like some entries skipped", previous_index, record.header.index);
previous_index = record.header.index;
Checksum checksum = computeRecordChecksum(record);
if (checksum != record_checksum)
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
"Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
filepath, record.header.version, record.header.index, record.header.blob_size);
}
if (logs.count(record.header.index) != 0)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplicated index id {} in log {}", record.header.index, filepath);
result.entries_read += 1;
if (record.header.index < start_log_index)
continue;
auto log_entry = nuraft::cs_new<nuraft::log_entry>(record.header.term, record.blob, record.header.value_type);
logs.emplace(record.header.index, log_entry);
index_to_offset[record.header.index] = result.last_position;
if (result.entries_read % 50000 == 0)
LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.entries_read);
}
}
catch (const Exception & ex)
{
if (ex.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION)
throw ex;
result.error = true;
LOG_WARNING(log, "Cannot completely read changelog on path {}, error: {}", filepath, ex.message());
}
catch (...)
{
result.error = true;
tryLogCurrentException(log);
}
LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.entries_read);
return result;
}
private:
std::string filepath;
ReadBufferFromFile read_buf;
};
Changelog::Changelog(const std::string & changelogs_dir_, size_t rotate_interval_, Poco::Logger * log_)
: changelogs_dir(changelogs_dir_)
, rotate_interval(rotate_interval_)
, log(log_)
{
namespace fs = std::filesystem;
if (!fs::exists(changelogs_dir))
fs::create_directories(changelogs_dir);
for (const auto & p : fs::directory_iterator(changelogs_dir))
{
auto file_description = getChangelogFileDescription(p.path());
existing_changelogs[file_description.from_log_index] = file_description;
}
}
void Changelog::readChangelogAndInitWriter(size_t from_log_index)
{
start_index = from_log_index == 0 ? 1 : from_log_index;
size_t total_read = 0;
size_t entries_in_last = 0;
size_t incomplete_log_index = 0;
ChangelogReadResult result{};
bool started = false;
for (const auto & [changelog_start_index, changelog_description] : existing_changelogs)
{
entries_in_last = changelog_description.to_log_index - changelog_description.from_log_index + 1;
if (changelog_description.to_log_index >= from_log_index)
{
if (!started)
{
if (changelog_description.from_log_index > start_index)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot read changelog from index {}, smallest available index {}", start_index, changelog_description.from_log_index);
started = true;
}
ChangelogReader reader(changelog_description.path);
result = reader.readChangelog(logs, from_log_index, index_to_start_pos, log);
total_read += result.entries_read;
/// May happen after truncate, crash or simply unfinished log
if (result.entries_read < entries_in_last)
{
incomplete_log_index = changelog_start_index;
break;
}
}
}
if (!started && start_index != 1)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Required to read data from {}, but we don't have any active changelogs", from_log_index);
if (incomplete_log_index != 0)
{
/// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them.
for (auto itr = existing_changelogs.upper_bound(incomplete_log_index); itr != existing_changelogs.end();)
{
LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path);
std::filesystem::remove(itr->second.path);
itr = existing_changelogs.erase(itr);
}
/// Continue to write into existing log
if (!existing_changelogs.empty())
{
auto description = existing_changelogs.rbegin()->second;
LOG_TRACE(log, "Continue to write into {}", description.path);
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
current_writer->setEntriesWritten(result.entries_read);
/// Truncate all broken entries from log
if (result.error)
{
LOG_WARNING(log, "Read finished with error, truncating all broken log entries");
current_writer->truncateToLength(result.last_position);
}
}
}
/// Start new log if we don't initialize writer from previous log
if (!current_writer)
rotate(start_index + total_read);
}
void Changelog::rotate(size_t new_start_log_index)
{
//// doesn't exist on init
if (current_writer)
current_writer->flush();
ChangelogFileDescription new_description;
new_description.prefix = DEFAULT_PREFIX;
new_description.from_log_index = new_start_log_index;
new_description.to_log_index = new_start_log_index + rotate_interval - 1;
new_description.path = formatChangelogPath(changelogs_dir, new_description);
LOG_TRACE(log, "Starting new changelog {}", new_description.path);
existing_changelogs[new_start_log_index] = new_description;
current_writer = std::make_unique<ChangelogWriter>(new_description.path, WriteMode::Rewrite, new_start_log_index);
}
ChangelogRecord Changelog::buildRecord(size_t index, const LogEntryPtr & log_entry)
{
ChangelogRecord record;
record.header.version = ChangelogVersion::V0;
record.header.index = index;
record.header.term = log_entry->get_term();
record.header.value_type = log_entry->get_val_type();
auto buffer = log_entry->get_buf_ptr();
if (buffer)
record.header.blob_size = buffer->size();
else
record.header.blob_size = 0;
record.blob = buffer;
return record;
}
void Changelog::appendEntry(size_t index, const LogEntryPtr & log_entry, bool force_sync)
{
if (!current_writer)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records");
if (logs.empty())
start_index = index;
if (current_writer->getEntriesWritten() == rotate_interval)
rotate(index);
auto offset = current_writer->appendRecord(buildRecord(index, log_entry), force_sync);
if (!index_to_start_pos.try_emplace(index, offset).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index);
logs[index] = makeClone(log_entry);
}
void Changelog::writeAt(size_t index, const LogEntryPtr & log_entry, bool force_sync)
{
if (index_to_start_pos.count(index) == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write at index {} because changelog doesn't contain it", index);
bool go_to_previous_file = index < current_writer->getStartIndex();
if (go_to_previous_file)
{
auto index_changelog = existing_changelogs.lower_bound(index);
ChangelogFileDescription description;
if (index_changelog->first == index)
description = index_changelog->second;
else
description = std::prev(index_changelog)->second;
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, index_changelog->first);
current_writer->setEntriesWritten(description.to_log_index - description.from_log_index + 1);
}
auto entries_written = current_writer->getEntriesWritten();
current_writer->truncateToLength(index_to_start_pos[index]);
if (go_to_previous_file)
{
/// Remove all subsequent files
auto to_remove_itr = existing_changelogs.upper_bound(index);
for (auto itr = to_remove_itr; itr != existing_changelogs.end();)
{
std::filesystem::remove(itr->second.path);
itr = existing_changelogs.erase(itr);
}
}
/// Remove redundant logs from memory
for (size_t i = index; ; ++i)
{
auto log_itr = logs.find(i);
if (log_itr == logs.end())
break;
logs.erase(log_itr);
index_to_start_pos.erase(i);
entries_written--;
}
current_writer->setEntriesWritten(entries_written);
appendEntry(index, log_entry, force_sync);
}
void Changelog::compact(size_t up_to_log_index)
{
for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();)
{
/// Remove all completely outdated changelog files
if (itr->second.to_log_index <= up_to_log_index)
{
LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path);
std::erase_if(index_to_start_pos, [right_index = itr->second.to_log_index] (const auto & item) { return item.first <= right_index; });
std::filesystem::remove(itr->second.path);
itr = existing_changelogs.erase(itr);
}
else /// Files are ordered, so all subsequent should exist
break;
}
start_index = up_to_log_index + 1;
std::erase_if(logs, [up_to_log_index] (const auto & item) { return item.first <= up_to_log_index; });
}
LogEntryPtr Changelog::getLastEntry() const
{
static LogEntryPtr fake_entry = nuraft::cs_new<nuraft::log_entry>(0, nuraft::buffer::alloc(sizeof(size_t)));
size_t next_index = getNextEntryIndex() - 1;
auto entry = logs.find(next_index);
if (entry == logs.end())
return fake_entry;
return entry->second;
}
LogEntriesPtr Changelog::getLogEntriesBetween(size_t start, size_t end)
{
LogEntriesPtr ret = nuraft::cs_new<std::vector<nuraft::ptr<nuraft::log_entry>>>();
ret->resize(end - start);
size_t result_pos = 0;
for (size_t i = start; i < end; ++i)
{
(*ret)[result_pos] = entryAt(i);
result_pos++;
}
return ret;
}
LogEntryPtr Changelog::entryAt(size_t index)
{
nuraft::ptr<nuraft::log_entry> src = nullptr;
auto entry = logs.find(index);
if (entry == logs.end())
return nullptr;
src = entry->second;
return src;
}
nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(size_t index, int32_t count)
{
std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
size_t size_total = 0;
for (size_t i = index; i < index + count; ++i)
{
auto entry = logs.find(i);
if (entry == logs.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Don't have log entry {}", i);
nuraft::ptr<nuraft::buffer> buf = entry->second->serialize();
size_total += buf->size();
returned_logs.push_back(buf);
}
nuraft::ptr<nuraft::buffer> buf_out = nuraft::buffer::alloc(sizeof(int32_t) + count * sizeof(int32_t) + size_total);
buf_out->pos(0);
buf_out->put(static_cast<int32_t>(count));
for (auto & entry : returned_logs)
{
nuraft::ptr<nuraft::buffer> & bb = entry;
buf_out->put(static_cast<int32_t>(bb->size()));
buf_out->put(*bb);
}
return buf_out;
}
void Changelog::applyEntriesFromBuffer(size_t index, nuraft::buffer & buffer, bool force_sync)
{
buffer.pos(0);
int num_logs = buffer.get_int();
for (int i = 0; i < num_logs; ++i)
{
size_t cur_index = index + i;
int buf_size = buffer.get_int();
nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);
buffer.get(buf_local);
LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
if (i == 0 && logs.count(cur_index))
writeAt(cur_index, log_entry, force_sync);
else
appendEntry(cur_index, log_entry, force_sync);
}
}
void Changelog::flush()
{
current_writer->flush();
}
Changelog::~Changelog()
{
try
{
if (current_writer)
current_writer->flush();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}

View File

@ -0,0 +1,136 @@
#pragma once
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <city.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/HashingWriteBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Disks/IDisk.h>
namespace DB
{
using Checksum = UInt64;
using LogEntryPtr = nuraft::ptr<nuraft::log_entry>;
using LogEntries = std::vector<LogEntryPtr>;
using LogEntriesPtr = nuraft::ptr<LogEntries>;
using BufferPtr = nuraft::ptr<nuraft::buffer>;
using IndexToOffset = std::unordered_map<size_t, off_t>;
using IndexToLogEntry = std::unordered_map<size_t, LogEntryPtr>;
enum class ChangelogVersion : uint8_t
{
V0 = 0,
};
static constexpr auto CURRENT_CHANGELOG_VERSION = ChangelogVersion::V0;
struct ChangelogRecordHeader
{
ChangelogVersion version = CURRENT_CHANGELOG_VERSION;
size_t index; /// entry log number
size_t term;
nuraft::log_val_type value_type;
size_t blob_size;
};
/// Changelog record on disk
struct ChangelogRecord
{
ChangelogRecordHeader header;
nuraft::ptr<nuraft::buffer> blob;
};
/// changelog_fromindex_toindex.bin
/// [fromindex, toindex] <- inclusive
struct ChangelogFileDescription
{
std::string prefix;
size_t from_log_index;
size_t to_log_index;
std::string path;
};
class ChangelogWriter;
/// Simplest changelog with files rotation.
/// No compression, no metadata, just entries with headers one by one
/// Able to read broken files/entries and discard them.
class Changelog
{
public:
Changelog(const std::string & changelogs_dir_, size_t rotate_interval_, Poco::Logger * log_);
/// Read changelog from files on changelogs_dir_ skipping all entries before from_log_index
/// Truncate broken entries, remove files after broken entries.
void readChangelogAndInitWriter(size_t from_log_index);
/// Add entry to log with index. Call fsync if force_sync true.
void appendEntry(size_t index, const LogEntryPtr & log_entry, bool force_sync);
/// Write entry at index and truncate all subsequent entries.
void writeAt(size_t index, const LogEntryPtr & log_entry, bool force_sync);
/// Remove log files with to_log_index <= up_to_log_index.
void compact(size_t up_to_log_index);
size_t getNextEntryIndex() const
{
return start_index + logs.size();
}
size_t getStartIndex() const
{
return start_index;
}
/// Last entry in log, or fake entry with term 0 if log is empty
LogEntryPtr getLastEntry() const;
/// Return log entries between [start, end)
LogEntriesPtr getLogEntriesBetween(size_t start_index, size_t end_index);
/// Return entry at position index
LogEntryPtr entryAt(size_t index);
/// Serialize entries from index into buffer
BufferPtr serializeEntriesToBuffer(size_t index, int32_t count);
/// Apply entries from buffer overriding existing entries
void applyEntriesFromBuffer(size_t index, nuraft::buffer & buffer, bool force_sync);
/// Fsync log to disk
void flush();
size_t size() const
{
return logs.size();
}
/// Fsync log to disk
~Changelog();
private:
/// Pack log_entry into changelog record
static ChangelogRecord buildRecord(size_t index, const LogEntryPtr & log_entry);
/// Starts new file [new_start_log_index, new_start_log_index + rotate_interval]
void rotate(size_t new_start_log_index);
private:
const std::string changelogs_dir;
const size_t rotate_interval;
Poco::Logger * log;
std::map<size_t, ChangelogFileDescription> existing_changelogs;
std::unique_ptr<ChangelogWriter> current_writer;
IndexToOffset index_to_start_pos;
IndexToLogEntry logs;
size_t start_index = 0;
};
}

View File

@ -22,13 +22,15 @@ struct Settings;
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(UInt64, reserved_log_items, 5000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 5000, "How many log items we have to collect to write new snapshot", 0) \
M(UInt64, reserved_log_items, 50000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(UInt64, max_stored_snapshots, 3, "How many snapshots we want to store", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \
M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0)
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -72,12 +72,12 @@ nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> InMemoryLogStore::log_e
ret->resize(end - start);
size_t cc = 0;
for (size_t ii = start; ii < end; ++ii)
for (size_t i = start; i < end; ++i)
{
nuraft::ptr<nuraft::log_entry> src = nullptr;
{
std::lock_guard<std::mutex> l(logs_lock);
auto entry = logs.find(ii);
auto entry = logs.find(i);
if (entry == logs.end())
{
entry = logs.find(0);
@ -152,9 +152,9 @@ void InMemoryLogStore::apply_pack(size_t index, nuraft::buffer & pack)
pack.pos(0);
Int32 num_logs = pack.get_int();
for (Int32 ii = 0; ii < num_logs; ++ii)
for (Int32 i = 0; i < num_logs; ++i)
{
size_t cur_idx = index + ii;
size_t cur_idx = index + i;
Int32 buf_size = pack.get_int();
nuraft::ptr<nuraft::buffer> buf_local = nuraft::buffer::alloc(buf_size);

View File

@ -9,12 +9,26 @@ namespace DB
class LoggerWrapper : public nuraft::logger
{
private:
static inline const std::unordered_map<LogsLevel, Poco::Message::Priority> LEVELS =
{
{LogsLevel::trace, Poco::Message::Priority::PRIO_TRACE},
{LogsLevel::debug, Poco::Message::Priority::PRIO_DEBUG},
{LogsLevel::information, Poco::Message::PRIO_INFORMATION},
{LogsLevel::warning, Poco::Message::PRIO_WARNING},
{LogsLevel::error, Poco::Message::PRIO_ERROR},
{LogsLevel::fatal, Poco::Message::PRIO_FATAL}
};
static inline const int LEVEL_MAX = static_cast<int>(LogsLevel::trace);
static inline const int LEVEL_MIN = static_cast<int>(LogsLevel::none);
public:
LoggerWrapper(const std::string & name, LogsLevel level_)
: log(&Poco::Logger::get(name))
, level(static_cast<int>(level_))
, level(level_)
{
log->setLevel(level);
log->setLevel(static_cast<int>(LEVELS.at(level)));
}
void put_details(
@ -24,24 +38,26 @@ public:
size_t /* line_number */,
const std::string & msg) override
{
LOG_IMPL(log, static_cast<DB::LogsLevel>(level_), static_cast<Poco::Message::Priority>(level_), msg);
LogsLevel db_level = static_cast<LogsLevel>(level_);
LOG_IMPL(log, db_level, LEVELS.at(db_level), msg);
}
void set_level(int level_) override
{
level_ = std::min(6, std::max(1, level_));
log->setLevel(level_);
level = level_;
level_ = std::min(LEVEL_MAX, std::max(LEVEL_MIN, level_));
level = static_cast<LogsLevel>(level_);
log->setLevel(static_cast<int>(LEVELS.at(level)));
}
int get_level() override
{
return level;
LogsLevel lvl = level;
return static_cast<int>(lvl);
}
private:
Poco::Logger * log;
std::atomic<int> level;
std::atomic<LogsLevel> level;
};
}

View File

@ -0,0 +1,105 @@
#include <Coordination/NuKeeperLogStore.h>
namespace DB
{
NuKeeperLogStore::NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_)
: log(&Poco::Logger::get("NuKeeperLogStore"))
, changelog(changelogs_path, rotate_interval_, log)
, force_sync(force_sync_)
{
}
size_t NuKeeperLogStore::start_index() const
{
std::lock_guard lock(changelog_lock);
return changelog.getStartIndex();
}
void NuKeeperLogStore::init(size_t from_log_idx)
{
std::lock_guard lock(changelog_lock);
changelog.readChangelogAndInitWriter(from_log_idx);
}
size_t NuKeeperLogStore::next_slot() const
{
std::lock_guard lock(changelog_lock);
return changelog.getNextEntryIndex();
}
nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::last_entry() const
{
std::lock_guard lock(changelog_lock);
return changelog.getLastEntry();
}
size_t NuKeeperLogStore::append(nuraft::ptr<nuraft::log_entry> & entry)
{
std::lock_guard lock(changelog_lock);
size_t idx = changelog.getNextEntryIndex();
changelog.appendEntry(idx, entry, force_sync);
return idx;
}
void NuKeeperLogStore::write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry)
{
std::lock_guard lock(changelog_lock);
changelog.writeAt(index, entry, force_sync);
}
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> NuKeeperLogStore::log_entries(size_t start, size_t end)
{
std::lock_guard lock(changelog_lock);
return changelog.getLogEntriesBetween(start, end);
}
nuraft::ptr<nuraft::log_entry> NuKeeperLogStore::entry_at(size_t index)
{
std::lock_guard lock(changelog_lock);
return changelog.entryAt(index);
}
size_t NuKeeperLogStore::term_at(size_t index)
{
std::lock_guard lock(changelog_lock);
auto entry = changelog.entryAt(index);
if (entry)
return entry->get_term();
return 0;
}
nuraft::ptr<nuraft::buffer> NuKeeperLogStore::pack(size_t index, int32_t cnt)
{
std::lock_guard lock(changelog_lock);
return changelog.serializeEntriesToBuffer(index, cnt);
}
bool NuKeeperLogStore::compact(size_t last_log_index)
{
std::lock_guard lock(changelog_lock);
changelog.compact(last_log_index);
return true;
}
bool NuKeeperLogStore::flush()
{
std::lock_guard lock(changelog_lock);
changelog.flush();
return true;
}
void NuKeeperLogStore::apply_pack(size_t index, nuraft::buffer & pack)
{
std::lock_guard lock(changelog_lock);
changelog.applyEntriesFromBuffer(index, pack, force_sync);
}
size_t NuKeeperLogStore::size() const
{
std::lock_guard lock(changelog_lock);
return changelog.size();
}
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <libnuraft/log_store.hxx> // Y_IGNORE
#include <map>
#include <mutex>
#include <Core/Types.h>
#include <Coordination/Changelog.h>
#include <common/logger_useful.h>
namespace DB
{
class NuKeeperLogStore : public nuraft::log_store
{
public:
NuKeeperLogStore(const std::string & changelogs_path, size_t rotate_interval_, bool force_sync_);
void init(size_t from_log_idx);
size_t start_index() const override;
size_t next_slot() const override;
nuraft::ptr<nuraft::log_entry> last_entry() const override;
size_t append(nuraft::ptr<nuraft::log_entry> & entry) override;
void write_at(size_t index, nuraft::ptr<nuraft::log_entry> & entry) override;
nuraft::ptr<std::vector<nuraft::ptr<nuraft::log_entry>>> log_entries(size_t start, size_t end) override;
nuraft::ptr<nuraft::log_entry> entry_at(size_t index) override;
size_t term_at(size_t index) override;
nuraft::ptr<nuraft::buffer> pack(size_t index, int32_t cnt) override;
void apply_pack(size_t index, nuraft::buffer & pack) override;
bool compact(size_t last_log_index) override;
bool flush() override;
size_t size() const;
private:
mutable std::mutex changelog_lock;
Poco::Logger * log;
Changelog changelog;
bool force_sync;
};
}

View File

@ -1,7 +1,7 @@
#include <Coordination/NuKeeperServer.h>
#include <Coordination/LoggerWrapper.h>
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/InMemoryStateManager.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/WriteBufferFromNuraftBuffer.h>
#include <Coordination/ReadBufferFromNuraftBuffer.h>
#include <IO/ReadHelpers.h>
@ -26,13 +26,16 @@ NuKeeperServer::NuKeeperServer(
: server_id(server_id_)
, coordination_settings(coordination_settings_)
, state_machine(nuraft::cs_new<NuKeeperStateMachine>(responses_queue_, coordination_settings))
, state_manager(nuraft::cs_new<InMemoryStateManager>(server_id, "test_keeper_server.raft_configuration", config))
, state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
, responses_queue(responses_queue_)
{
}
void NuKeeperServer::startup()
{
state_manager->loadLogStore(state_machine->last_commit_index());
nuraft::raft_params params;
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
@ -64,6 +67,7 @@ void NuKeeperServer::startup()
void NuKeeperServer::shutdown()
{
state_machine->shutdownStorage();
state_manager->flushLogStore();
if (!launcher.shutdown(coordination_settings->shutdown_timeout.totalSeconds()))
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Failed to shutdown RAFT server in {} seconds", 5);
}
@ -157,7 +161,7 @@ bool NuKeeperServer::isLeaderAlive() const
nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
{
if (type == nuraft::cb_func::Type::BecomeFresh || type == nuraft::cb_func::Type::BecomeLeader)
if ((type == nuraft::cb_func::InitialBatchCommited && isLeader()) || type == nuraft::cb_func::BecomeFresh)
{
std::unique_lock lock(initialized_mutex);
initialized_flag = true;

View File

@ -2,7 +2,7 @@
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Coordination/InMemoryLogStore.h>
#include <Coordination/InMemoryStateManager.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/NuKeeperStateMachine.h>
#include <Coordination/NuKeeperStorage.h>
#include <Coordination/CoordinationSettings.h>
@ -20,7 +20,7 @@ private:
nuraft::ptr<NuKeeperStateMachine> state_machine;
nuraft::ptr<InMemoryStateManager> state_manager;
nuraft::ptr<NuKeeperStateManager> state_manager;
nuraft::raft_launcher launcher;

View File

@ -46,7 +46,7 @@ NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, co
, storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
, responses_queue(responses_queue_)
, last_committed_idx(0)
, log(&Poco::Logger::get("NuRaftStateMachine"))
, log(&Poco::Logger::get("NuKeeperStateMachine"))
{
LOG_DEBUG(log, "Created nukeeper state machine");
}

View File

@ -1,4 +1,4 @@
#include <Coordination/InMemoryStateManager.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Common/Exception.h>
namespace DB
@ -9,30 +9,34 @@ namespace ErrorCodes
extern const int RAFT_ERROR;
}
InMemoryStateManager::InMemoryStateManager(int server_id_, const std::string & host, int port)
NuKeeperStateManager::NuKeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, my_port(port)
, log_store(nuraft::cs_new<InMemoryLogStore>())
, log_store(nuraft::cs_new<NuKeeperLogStore>(logs_path, 5000, false))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
cluster_config->get_servers().push_back(peer_config);
}
InMemoryStateManager::InMemoryStateManager(
NuKeeperStateManager::NuKeeperStateManager(
int my_server_id_,
const std::string & config_prefix,
const Poco::Util::AbstractConfiguration & config)
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings)
: my_server_id(my_server_id_)
, log_store(nuraft::cs_new<InMemoryLogStore>())
, log_store(nuraft::cs_new<NuKeeperLogStore>(
config.getString(config_prefix + ".log_storage_path"),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
config.keys(config_prefix + ".raft_configuration", keys);
for (const auto & server_key : keys)
{
std::string full_prefix = config_prefix + "." + server_key;
std::string full_prefix = config_prefix + ".raft_configuration." + server_key;
int server_id = config.getInt(full_prefix + ".id");
std::string hostname = config.getString(full_prefix + ".hostname");
int port = config.getInt(full_prefix + ".port");
@ -53,13 +57,23 @@ InMemoryStateManager::InMemoryStateManager(
cluster_config->get_servers().push_back(peer_config);
}
if (!my_server_config)
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section");
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id);
if (start_as_follower_servers.size() == cluster_config->get_servers().size())
throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
}
void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
void NuKeeperStateManager::loadLogStore(size_t start_log_index)
{
log_store->init(start_log_index);
}
void NuKeeperStateManager::flushLogStore()
{
log_store->flush();
}
void NuKeeperStateManager::save_config(const nuraft::cluster_config & config)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.
@ -67,7 +81,7 @@ void InMemoryStateManager::save_config(const nuraft::cluster_config & config)
cluster_config = nuraft::cluster_config::deserialize(*buf);
}
void InMemoryStateManager::save_state(const nuraft::srv_state & state)
void NuKeeperStateManager::save_state(const nuraft::srv_state & state)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.

View File

@ -2,25 +2,32 @@
#include <Core/Types.h>
#include <string>
#include <Coordination/InMemoryLogStore.h>
#include <Coordination/NuKeeperLogStore.h>
#include <Coordination/CoordinationSettings.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
class InMemoryStateManager : public nuraft::state_mgr
class NuKeeperStateManager : public nuraft::state_mgr
{
public:
InMemoryStateManager(
NuKeeperStateManager(
int server_id_,
const std::string & config_prefix,
const Poco::Util::AbstractConfiguration & config);
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings);
InMemoryStateManager(
NuKeeperStateManager(
int server_id_,
const std::string & host,
int port);
int port,
const std::string & logs_path);
void loadLogStore(size_t start_log_index);
void flushLogStore();
nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
@ -49,7 +56,7 @@ private:
int my_server_id;
int my_port;
std::unordered_set<int> start_as_follower_servers;
nuraft::ptr<InMemoryLogStore> log_store;
nuraft::ptr<NuKeeperLogStore> log_store;
nuraft::ptr<nuraft::srv_config> my_server_config;
nuraft::ptr<nuraft::cluster_config> cluster_config;
nuraft::ptr<nuraft::srv_state> server_state;

View File

@ -25,10 +25,10 @@ static String parentPath(const String & path)
return "/";
}
static String baseName(const String & path)
static std::string getBaseName(const String & path)
{
auto rslash_pos = path.rfind('/');
return path.substr(rslash_pos + 1);
size_t basename_start = path.rfind('/');
return std::string{&path[basename_start + 1], path.length() - basename_start - 1};
}
static NuKeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, NuKeeperStorage::Watches & watches, NuKeeperStorage::Watches & list_watches, Coordination::Event event_type)
@ -167,14 +167,17 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
/// Increment sequential number even if node is not sequential
++it->second.seq_num;
response.path_created = path_created;
container.emplace(path_created, std::move(created_node));
auto child_path = getBaseName(path_created);
it->second.children.insert(child_path);
if (request.is_ephemeral)
ephemerals[session_id].emplace(path_created);
undo = [&container, &ephemerals, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path = it->first]
undo = [&container, &ephemerals, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path = it->first, child_path]
{
container.erase(path_created);
if (is_ephemeral)
@ -183,6 +186,7 @@ struct NuKeeperStorageCreateRequest final : public NuKeeperStorageRequest
--undo_parent.stat.cversion;
--undo_parent.stat.numChildren;
--undo_parent.seq_num;
undo_parent.children.erase(child_path);
};
++it->second.stat.cversion;
@ -250,13 +254,16 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
if (prev_node.is_ephemeral)
ephemerals[session_id].erase(request.path);
container.erase(it);
auto child_basename = getBaseName(it->first);
auto & parent = container.at(parentPath(request.path));
--parent.stat.numChildren;
++parent.stat.cversion;
parent.children.erase(child_basename);
response.error = Coordination::Error::ZOK;
undo = [prev_node, &container, &ephemerals, session_id, path = request.path]
container.erase(it);
undo = [prev_node, &container, &ephemerals, session_id, path = request.path, child_basename]
{
if (prev_node.is_ephemeral)
ephemerals[session_id].emplace(path);
@ -265,6 +272,7 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
auto & undo_parent = container.at(parentPath(path));
++undo_parent.stat.numChildren;
--undo_parent.stat.cversion;
undo_parent.children.insert(child_basename);
};
}
@ -370,17 +378,9 @@ struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
if (path_prefix.empty())
throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR);
if (path_prefix.back() != '/')
path_prefix += '/';
response.names.insert(response.names.end(), it->second.children.begin(), it->second.children.end());
/// Fairly inefficient.
for (auto child_it = container.upper_bound(path_prefix);
child_it != container.end() && startsWith(child_it->first, path_prefix);
++child_it)
{
if (parentPath(child_it->first) == request.path)
response.names.emplace_back(baseName(child_it->first));
}
std::sort(response.names.begin(), response.names.end());
response.stat = it->second.stat;
response.error = Coordination::Error::ZOK;

View File

@ -16,6 +16,7 @@ using namespace DB;
struct NuKeeperStorageRequest;
using NuKeeperStorageRequestPtr = std::shared_ptr<NuKeeperStorageRequest>;
using ResponseCallback = std::function<void(const Coordination::ZooKeeperResponsePtr &)>;
using ChildrenSet = std::unordered_set<std::string>;
class NuKeeperStorage
{
@ -30,6 +31,7 @@ public:
bool is_sequental = false;
Coordination::Stat stat{};
int32_t seq_num = 0;
ChildrenSet children{};
};
struct ResponseForSession
@ -48,9 +50,9 @@ public:
using RequestsForSessions = std::vector<RequestForSession>;
using Container = std::map<std::string, Node>;
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<String>>;
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<String>>;
using Container = std::unordered_map<std::string, Node>;
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>;
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<std::string>>;
using SessionAndTimeout = std::unordered_map<int64_t, long>;
using SessionIDs = std::vector<int64_t>;

View File

@ -59,13 +59,16 @@ void NuKeeperStorageSerializer::deserialize(NuKeeperStorage & storage, ReadBuffe
size_t container_size;
Coordination::read(container_size, in);
while (storage.container.size() < container_size)
size_t current_size = 0;
while (current_size < container_size)
{
std::string path;
Coordination::read(path, in);
NuKeeperStorage::Node node;
readNode(node, in);
storage.container[path] = node;
current_size++;
}
size_t ephemerals_size;
Coordination::read(ephemerals_size, in);

View File

@ -6,9 +6,10 @@
#endif
#if USE_NURAFT
#include <Poco/ConsoleChannel.h>
#include <Poco/Logger.h>
#include <Coordination/InMemoryLogStore.h>
#include <Coordination/InMemoryStateManager.h>
#include <Coordination/NuKeeperStateManager.h>
#include <Coordination/NuKeeperStorageSerializer.h>
#include <Coordination/SummingStateMachine.h>
#include <Coordination/NuKeeperStateMachine.h>
@ -20,9 +21,35 @@
#include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <Common/Exception.h>
#include <common/logger_useful.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <thread>
#include <Coordination/NuKeeperLogStore.h>
#include <Coordination/Changelog.h>
#include <filesystem>
namespace fs = std::filesystem;
struct ChangelogDirTest
{
std::string path;
bool drop;
explicit ChangelogDirTest(std::string path_, bool drop_ = true)
: path(path_)
, drop(drop_)
{
if (fs::exists(path))
{
EXPECT_TRUE(false) << "Path " << path << " already exists, remove it to run test";
}
fs::create_directory(path);
}
~ChangelogDirTest()
{
if (fs::exists(path) && drop)
fs::remove_all(path);
}
};
TEST(CoordinationTest, BuildTest)
{
@ -67,14 +94,15 @@ TEST(CoordinationTest, BufferSerde)
template <typename StateMachine>
struct SimpliestRaftServer
{
SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_)
SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_, const std::string & logs_path)
: server_id(server_id_)
, hostname(hostname_)
, port(port_)
, endpoint(hostname + ":" + std::to_string(port))
, state_machine(nuraft::cs_new<StateMachine>())
, state_manager(nuraft::cs_new<DB::InMemoryStateManager>(server_id, hostname, port))
, state_manager(nuraft::cs_new<DB::NuKeeperStateManager>(server_id, hostname, port, logs_path))
{
state_manager->loadLogStore(1);
nuraft::raft_params params;
params.heart_beat_interval_ = 100;
params.election_timeout_lower_bound_ = 200;
@ -90,10 +118,10 @@ struct SimpliestRaftServer
if (!raft_instance)
{
std::cerr << "Failed to initialize launcher (see the message "
"in the log file)." << std::endl;
std::cerr << "Failed to initialize launcher" << std::endl;
exit(-1);
}
std::cout << "init Raft instance " << server_id;
for (size_t ii = 0; ii < 20; ++ii)
{
@ -123,7 +151,7 @@ struct SimpliestRaftServer
nuraft::ptr<StateMachine> state_machine;
// State manager.
nuraft::ptr<nuraft::state_mgr> state_manager;
nuraft::ptr<DB::NuKeeperStateManager> state_manager;
// Raft launcher.
nuraft::raft_launcher launcher;
@ -134,11 +162,10 @@ struct SimpliestRaftServer
using SummingRaftServer = SimpliestRaftServer<DB::SummingStateMachine>;
nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
nuraft::ptr<nuraft::buffer> getBuffer(int64_t number)
{
nuraft::ptr<nuraft::buffer> ret = nuraft::buffer::alloc(sizeof(number));
nuraft::buffer_serializer bs(ret);
// WARNING: We don't consider endian-safety in this example.
bs.put_raw(&number, sizeof(number));
return ret;
}
@ -146,12 +173,13 @@ nuraft::ptr<nuraft::buffer> getLogEntry(int64_t number)
TEST(CoordinationTest, TestSummingRaft1)
{
SummingRaftServer s1(1, "localhost", 44444);
ChangelogDirTest test("./logs");
SummingRaftServer s1(1, "localhost", 44444, "./logs");
/// Single node is leader
EXPECT_EQ(s1.raft_instance->get_leader(), 1);
auto entry1 = getLogEntry(143);
auto entry1 = getBuffer(143);
auto ret = s1.raft_instance->append_entries({entry1});
EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
@ -169,17 +197,23 @@ TEST(CoordinationTest, TestSummingRaft1)
TEST(CoordinationTest, TestSummingRaft3)
{
SummingRaftServer s1(1, "localhost", 44444);
SummingRaftServer s2(2, "localhost", 44445);
SummingRaftServer s3(3, "localhost", 44446);
ChangelogDirTest test1("./logs1");
SummingRaftServer s1(1, "localhost", 44444, "./logs1");
ChangelogDirTest test2("./logs2");
SummingRaftServer s2(2, "localhost", 44445, "./logs2");
ChangelogDirTest test3("./logs3");
SummingRaftServer s3(3, "localhost", 44446, "./logs3");
nuraft::srv_config first_config(1, "localhost:44444");
nuraft::srv_config first_config(1, 0, "localhost:44444", "", false, 0);
auto ret1 = s2.raft_instance->add_srv(first_config);
if (!ret1->get_accepted())
while (!ret1->get_accepted())
{
std::cout << "failed to add server: "
<< ret1->get_result_str() << std::endl;
EXPECT_TRUE(false);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
ret1 = s2.raft_instance->add_srv(first_config);
}
while (s1.raft_instance->get_leader() != 2)
@ -188,13 +222,15 @@ TEST(CoordinationTest, TestSummingRaft3)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
nuraft::srv_config third_config(3, "localhost:44446");
nuraft::srv_config third_config(3, 0, "localhost:44446", "", false, 0);
auto ret3 = s2.raft_instance->add_srv(third_config);
if (!ret3->get_accepted())
{
std::cout << "failed to add server: "
<< ret3->get_result_str() << std::endl;
EXPECT_TRUE(false);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
ret3 = s2.raft_instance->add_srv(third_config);
}
while (s3.raft_instance->get_leader() != 2)
@ -209,10 +245,13 @@ TEST(CoordinationTest, TestSummingRaft3)
EXPECT_EQ(s3.raft_instance->get_leader(), 2);
std::cerr << "Starting to add entries\n";
auto entry = getLogEntry(1);
auto entry = getBuffer(1);
auto ret = s2.raft_instance->append_entries({entry});
EXPECT_TRUE(ret->get_accepted()) << "failed to replicate: entry 1" << ret->get_result_code();
EXPECT_EQ(ret->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 1" << ret->get_result_code();
while (!ret->get_accepted() || ret->get_result_code() != nuraft::cmd_result_code::OK)
{
std::cerr << ret->get_accepted() << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
ret = s2.raft_instance->append_entries({entry});
}
while (s1.state_machine->getValue() != 1)
{
@ -236,7 +275,7 @@ TEST(CoordinationTest, TestSummingRaft3)
EXPECT_EQ(s2.state_machine->getValue(), 1);
EXPECT_EQ(s3.state_machine->getValue(), 1);
auto non_leader_entry = getLogEntry(3);
auto non_leader_entry = getBuffer(3);
auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
EXPECT_FALSE(ret_non_leader1->get_accepted());
@ -245,10 +284,13 @@ TEST(CoordinationTest, TestSummingRaft3)
EXPECT_FALSE(ret_non_leader3->get_accepted());
auto leader_entry = getLogEntry(77);
auto leader_entry = getBuffer(77);
auto ret_leader = s2.raft_instance->append_entries({leader_entry});
EXPECT_TRUE(ret_leader->get_accepted()) << "failed to replicate: entry 78" << ret_leader->get_result_code();
EXPECT_EQ(ret_leader->get_result_code(), nuraft::cmd_result_code::OK) << "failed to replicate: entry 78" << ret_leader->get_result_code();
while (!ret_leader->get_accepted() || ret_leader->get_result_code() != nuraft::cmd_result_code::OK)
{
std::cerr << "failed to replicate: entry 78" << ret_leader->get_result_code() << std::endl;
ret_leader = s2.raft_instance->append_entries({leader_entry});
}
while (s1.state_machine->getValue() != 78)
{
@ -333,4 +375,586 @@ TEST(CoordinationTest, TestStorageSerialization)
EXPECT_EQ(new_storage.ephemerals[1].size(), 1);
}
DB::LogEntryPtr getLogEntry(const std::string & s, size_t term)
{
DB::WriteBufferFromNuraftBuffer bufwriter;
writeText(s, bufwriter);
return nuraft::cs_new<nuraft::log_entry>(term, bufwriter.getBuffer());
}
TEST(CoordinationTest, ChangelogTestSimple)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
auto entry = getLogEntry("hello world", 77);
changelog.append(entry);
EXPECT_EQ(changelog.next_slot(), 2);
EXPECT_EQ(changelog.start_index(), 1);
EXPECT_EQ(changelog.last_entry()->get_term(), 77);
EXPECT_EQ(changelog.entry_at(1)->get_term(), 77);
EXPECT_EQ(changelog.log_entries(1, 2)->size(), 1);
}
TEST(CoordinationTest, ChangelogTestFile)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
auto entry = getLogEntry("hello world", 77);
changelog.append(entry);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
for (const auto & p : fs::directory_iterator("./logs"))
EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin");
changelog.append(entry);
changelog.append(entry);
changelog.append(entry);
changelog.append(entry);
changelog.append(entry);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
}
TEST(CoordinationTest, ChangelogReadWrite)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 1000, true);
changelog.init(1);
for (size_t i = 0; i < 10; ++i)
{
auto entry = getLogEntry("hello world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 10);
DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
changelog_reader.init(1);
EXPECT_EQ(changelog_reader.size(), 10);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
EXPECT_EQ(changelog_reader.start_index(), changelog.start_index());
EXPECT_EQ(changelog_reader.next_slot(), changelog.next_slot());
for (size_t i = 0; i < 10; ++i)
EXPECT_EQ(changelog_reader.entry_at(i + 1)->get_term(), changelog.entry_at(i + 1)->get_term());
auto entries_from_range_read = changelog_reader.log_entries(1, 11);
auto entries_from_range = changelog.log_entries(1, 11);
EXPECT_EQ(entries_from_range_read->size(), entries_from_range->size());
EXPECT_EQ(10, entries_from_range->size());
}
TEST(CoordinationTest, ChangelogWriteAt)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 1000, true);
changelog.init(1);
for (size_t i = 0; i < 10; ++i)
{
auto entry = getLogEntry("hello world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 10);
auto entry = getLogEntry("writer", 77);
changelog.write_at(7, entry);
EXPECT_EQ(changelog.size(), 7);
EXPECT_EQ(changelog.last_entry()->get_term(), 77);
EXPECT_EQ(changelog.entry_at(7)->get_term(), 77);
EXPECT_EQ(changelog.next_slot(), 8);
DB::NuKeeperLogStore changelog_reader("./logs", 1000, true);
changelog_reader.init(1);
EXPECT_EQ(changelog_reader.size(), changelog.size());
EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
EXPECT_EQ(changelog_reader.start_index(), changelog.start_index());
EXPECT_EQ(changelog_reader.next_slot(), changelog.next_slot());
}
TEST(CoordinationTest, ChangelogTestAppendAfterRead)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 7; ++i)
{
auto entry = getLogEntry("hello world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 7);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1);
EXPECT_EQ(changelog_reader.size(), 7);
for (size_t i = 7; i < 10; ++i)
{
auto entry = getLogEntry("hello world", i * 10);
changelog_reader.append(entry);
}
EXPECT_EQ(changelog_reader.size(), 10);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
size_t logs_count = 0;
for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs"))
logs_count++;
EXPECT_EQ(logs_count, 2);
auto entry = getLogEntry("someentry", 77);
changelog_reader.append(entry);
EXPECT_EQ(changelog_reader.size(), 11);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
logs_count = 0;
for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs"))
logs_count++;
EXPECT_EQ(logs_count, 3);
}
TEST(CoordinationTest, ChangelogTestCompaction)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 3; ++i)
{
auto entry = getLogEntry("hello world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 3);
changelog.compact(2);
EXPECT_EQ(changelog.size(), 1);
EXPECT_EQ(changelog.start_index(), 3);
EXPECT_EQ(changelog.next_slot(), 4);
EXPECT_EQ(changelog.last_entry()->get_term(), 20);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
auto e1 = getLogEntry("hello world", 30);
changelog.append(e1);
auto e2 = getLogEntry("hello world", 40);
changelog.append(e2);
auto e3 = getLogEntry("hello world", 50);
changelog.append(e3);
auto e4 = getLogEntry("hello world", 60);
changelog.append(e4);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
changelog.compact(6);
EXPECT_FALSE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_EQ(changelog.size(), 1);
EXPECT_EQ(changelog.start_index(), 7);
EXPECT_EQ(changelog.next_slot(), 8);
EXPECT_EQ(changelog.last_entry()->get_term(), 60);
/// And we able to read it
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(7);
EXPECT_EQ(changelog_reader.size(), 1);
EXPECT_EQ(changelog_reader.start_index(), 7);
EXPECT_EQ(changelog_reader.next_slot(), 8);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 60);
}
TEST(CoordinationTest, ChangelogTestBatchOperations)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 100, true);
changelog.init(1);
for (size_t i = 0; i < 10; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 10);
auto entries = changelog.pack(1, 5);
DB::NuKeeperLogStore apply_changelog("./logs", 100, true);
apply_changelog.init(1);
for (size_t i = 0; i < 10; ++i)
{
EXPECT_EQ(apply_changelog.entry_at(i + 1)->get_term(), i * 10);
}
EXPECT_EQ(apply_changelog.size(), 10);
apply_changelog.apply_pack(8, *entries);
EXPECT_EQ(apply_changelog.size(), 12);
EXPECT_EQ(apply_changelog.start_index(), 1);
EXPECT_EQ(apply_changelog.next_slot(), 13);
for (size_t i = 0; i < 7; ++i)
{
EXPECT_EQ(apply_changelog.entry_at(i + 1)->get_term(), i * 10);
}
EXPECT_EQ(apply_changelog.entry_at(8)->get_term(), 0);
EXPECT_EQ(apply_changelog.entry_at(9)->get_term(), 10);
EXPECT_EQ(apply_changelog.entry_at(10)->get_term(), 20);
EXPECT_EQ(apply_changelog.entry_at(11)->get_term(), 30);
EXPECT_EQ(apply_changelog.entry_at(12)->get_term(), 40);
}
TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 100, true);
changelog.init(1);
for (size_t i = 0; i < 10; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 10);
auto entries = changelog.pack(5, 5);
ChangelogDirTest test1("./logs1");
DB::NuKeeperLogStore changelog_new("./logs1", 100, true);
changelog_new.init(1);
EXPECT_EQ(changelog_new.size(), 0);
changelog_new.apply_pack(5, *entries);
EXPECT_EQ(changelog_new.size(), 5);
EXPECT_EQ(changelog_new.start_index(), 5);
EXPECT_EQ(changelog_new.next_slot(), 10);
for (size_t i = 4; i < 9; ++i)
EXPECT_EQ(changelog_new.entry_at(i + 1)->get_term(), i * 10);
auto e = getLogEntry("hello_world", 110);
changelog_new.append(e);
EXPECT_EQ(changelog_new.size(), 6);
EXPECT_EQ(changelog_new.start_index(), 5);
EXPECT_EQ(changelog_new.next_slot(), 11);
DB::NuKeeperLogStore changelog_reader("./logs1", 100, true);
changelog_reader.init(5);
}
TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 33; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
EXPECT_EQ(changelog.size(), 33);
auto e1 = getLogEntry("helloworld", 5555);
changelog.write_at(7, e1);
EXPECT_EQ(changelog.size(), 7);
EXPECT_EQ(changelog.start_index(), 1);
EXPECT_EQ(changelog.next_slot(), 8);
EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_read("./logs", 5, true);
changelog_read.init(1);
EXPECT_EQ(changelog_read.size(), 7);
EXPECT_EQ(changelog_read.start_index(), 1);
EXPECT_EQ(changelog_read.next_slot(), 8);
EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555);
}
TEST(CoordinationTest, ChangelogTestWriteAtFileBorder)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 33; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
EXPECT_EQ(changelog.size(), 33);
auto e1 = getLogEntry("helloworld", 5555);
changelog.write_at(11, e1);
EXPECT_EQ(changelog.size(), 11);
EXPECT_EQ(changelog.start_index(), 1);
EXPECT_EQ(changelog.next_slot(), 12);
EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_read("./logs", 5, true);
changelog_read.init(1);
EXPECT_EQ(changelog_read.size(), 11);
EXPECT_EQ(changelog_read.start_index(), 1);
EXPECT_EQ(changelog_read.next_slot(), 12);
EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555);
}
TEST(CoordinationTest, ChangelogTestWriteAtAllFiles)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 33; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
EXPECT_EQ(changelog.size(), 33);
auto e1 = getLogEntry("helloworld", 5555);
changelog.write_at(1, e1);
EXPECT_EQ(changelog.size(), 1);
EXPECT_EQ(changelog.start_index(), 1);
EXPECT_EQ(changelog.next_slot(), 2);
EXPECT_EQ(changelog.last_entry()->get_term(), 5555);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
}
TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 35; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 35);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin"));
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1);
auto entry = getLogEntry("36_hello_world", 360);
changelog_reader.append(entry);
EXPECT_EQ(changelog_reader.size(), 36);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin"));
}
TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 5, true);
changelog.init(1);
for (size_t i = 0; i < 35; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10);
changelog.append(entry);
}
EXPECT_EQ(changelog.size(), 35);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin"));
DB::WriteBufferFromFile plain_buf("./logs/changelog_11_15.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(0);
DB::NuKeeperLogStore changelog_reader("./logs", 5, true);
changelog_reader.init(1);
EXPECT_EQ(changelog_reader.size(), 10);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 90);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
auto entry = getLogEntry("h", 7777);
changelog_reader.append(entry);
EXPECT_EQ(changelog_reader.size(), 11);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin"));
DB::NuKeeperLogStore changelog_reader2("./logs", 5, true);
changelog_reader2.init(1);
EXPECT_EQ(changelog_reader2.size(), 11);
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
}
TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 20, true);
changelog.init(1);
for (size_t i = 0; i < 35; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
changelog.append(entry);
}
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
DB::WriteBufferFromFile plain_buf("./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(140);
DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
changelog_reader.init(1);
EXPECT_EQ(changelog_reader.size(), 2);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 450);
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
EXPECT_FALSE(fs::exists("./logs/changelog_21_40.bin"));
auto entry = getLogEntry("hello_world", 7777);
changelog_reader.append(entry);
EXPECT_EQ(changelog_reader.size(), 3);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
DB::NuKeeperLogStore changelog_reader2("./logs", 20, true);
changelog_reader2.init(1);
EXPECT_EQ(changelog_reader2.size(), 3);
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
}
TEST(CoordinationTest, ChangelogTestLostFiles)
{
ChangelogDirTest test("./logs");
DB::NuKeeperLogStore changelog("./logs", 20, true);
changelog.init(1);
for (size_t i = 0; i < 35; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
changelog.append(entry);
}
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin"));
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
fs::remove("./logs/changelog_1_20.bin");
DB::NuKeeperLogStore changelog_reader("./logs", 20, true);
EXPECT_THROW(changelog_reader.init(5), DB::Exception);
fs::remove("./logs/changelog_21_40.bin");
EXPECT_THROW(changelog_reader.init(3), DB::Exception);
}
int main(int argc, char ** argv)
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
Poco::Logger::root().setLevel("trace");
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#endif

View File

@ -84,16 +84,7 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr
return get("LowCardinality", low_cardinality_params);
}
DataTypePtr res = findCreatorByName(family_name)(parameters);
if (CurrentThread::isInitialized())
{
const auto * query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
}
return res;
return findCreatorByName(family_name)(parameters);
}
DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const
@ -165,19 +156,31 @@ void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCre
const DataTypeFactory::Value & DataTypeFactory::findCreatorByName(const String & family_name) const
{
const Context * query_context = nullptr;
if (CurrentThread::isInitialized())
query_context = CurrentThread::get().getQueryContext();
{
DataTypesDictionary::const_iterator it = data_types.find(family_name);
if (data_types.end() != it)
{
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
return it->second;
}
}
String family_name_lowercase = Poco::toLower(family_name);
{
DataTypesDictionary::const_iterator it = case_insensitive_data_types.find(family_name_lowercase);
if (case_insensitive_data_types.end() != it)
{
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name_lowercase);
return it->second;
}
}
auto hints = this->getHints(family_name);
if (!hints.empty())

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int CANNOT_READ_ALL_DATA;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int TOO_LARGE_STRING_SIZE;
}
@ -120,13 +121,21 @@ void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t ro
}
static inline void alignStringLength(const DataTypeFixedString & type,
ColumnFixedString::Chars & data,
size_t string_start)
void DataTypeFixedString::alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const
{
ColumnFixedString::alignStringLength(data, type.getN(), string_start);
size_t length = chars.size() - old_size;
if (length < n)
{
chars.resize_fill(old_size + n);
}
else if (length > n)
{
chars.resize_assume_reserved(old_size);
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
}
}
template <typename Reader>
static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader)
{
@ -135,7 +144,7 @@ static inline void read(const DataTypeFixedString & self, IColumn & column, Read
try
{
reader(data);
alignStringLength(self, data, prev_size);
self.alignStringLength(data, prev_size);
}
catch (...)
{

View File

@ -1,6 +1,7 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Common/PODArray_fwd.h>
#define MAX_FIXEDSTRING_SIZE 0xFFFFFF
@ -82,6 +83,11 @@ public:
bool isCategorial() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; }
/// Makes sure that the length of a newly inserted string to `chars` is equal to getN().
/// If the length is less than getN() the function will add zero characters up to getN().
/// If the length is greater than getN() the function will throw an exception.
void alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const;
};
}

View File

@ -525,16 +525,16 @@ namespace
{
public:
using ColumnType = std::conditional_t<is_fixed_string, ColumnFixedString, ColumnString>;
using StringDataType = std::conditional_t<is_fixed_string, DataTypeFixedString, DataTypeString>;
ProtobufSerializerString(
const StringDataType & string_data_type_,
const std::shared_ptr<const DataTypeFixedString> & fixed_string_data_type_,
const google::protobuf::FieldDescriptor & field_descriptor_,
const ProtobufReaderOrWriter & reader_or_writer_)
: ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_)
, fixed_string_data_type(fixed_string_data_type_)
, n(fixed_string_data_type->getN())
{
static_assert(is_fixed_string, "This constructor for FixedString only");
n = string_data_type_.getN();
setFunctions();
prepareEnumMapping();
}
@ -583,11 +583,11 @@ namespace
{
if (row_num < old_size)
{
ColumnFixedString::alignStringLength(text_buffer, n, 0);
fixed_string_data_type->alignStringLength(text_buffer, 0);
memcpy(data.data() + row_num * n, text_buffer.data(), n);
}
else
ColumnFixedString::alignStringLength(data, n, old_data_size);
fixed_string_data_type->alignStringLength(data, old_data_size);
}
else
{
@ -817,7 +817,7 @@ namespace
auto str = default_function();
arr.insert(str.data(), str.data() + str.size());
if constexpr (is_fixed_string)
ColumnFixedString::alignStringLength(arr, n, 0);
fixed_string_data_type->alignStringLength(arr, 0);
default_string = std::move(arr);
}
return *default_string;
@ -865,7 +865,8 @@ namespace
str.insert(name.data(), name.data() + name.length());
}
size_t n = 0;
const std::shared_ptr<const DataTypeFixedString> fixed_string_data_type;
const size_t n = 0;
std::function<void(const std::string_view &)> write_function;
std::function<void(PaddedPODArray<UInt8> &)> read_function;
std::function<String()> default_function;
@ -2765,7 +2766,7 @@ namespace
case TypeIndex::DateTime: return std::make_unique<ProtobufSerializerDateTime>(field_descriptor, reader_or_writer);
case TypeIndex::DateTime64: return std::make_unique<ProtobufSerializerDateTime64>(assert_cast<const DataTypeDateTime64 &>(*data_type), field_descriptor, reader_or_writer);
case TypeIndex::String: return std::make_unique<ProtobufSerializerString<false>>(field_descriptor, reader_or_writer);
case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(assert_cast<const DataTypeFixedString &>(*data_type), field_descriptor, reader_or_writer);
case TypeIndex::FixedString: return std::make_unique<ProtobufSerializerString<true>>(typeid_cast<std::shared_ptr<const DataTypeFixedString>>(data_type), field_descriptor, reader_or_writer);
case TypeIndex::Enum8: return std::make_unique<ProtobufSerializerEnum<Int8>>(typeid_cast<std::shared_ptr<const DataTypeEnum8>>(data_type), field_descriptor, reader_or_writer);
case TypeIndex::Enum16: return std::make_unique<ProtobufSerializerEnum<Int16>>(typeid_cast<std::shared_ptr<const DataTypeEnum16>>(data_type), field_descriptor, reader_or_writer);
case TypeIndex::Decimal32: return std::make_unique<ProtobufSerializerDecimal<Decimal32>>(assert_cast<const DataTypeDecimal<Decimal32> &>(*data_type), field_descriptor, reader_or_writer);
@ -2810,12 +2811,7 @@ namespace
const auto & array_data_type = assert_cast<const DataTypeArray &>(*data_type);
if (!allow_repeat)
{
throw Exception(
"The field " + quoteString(field_descriptor.full_name())
+ " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
}
throwFieldNotRepeated(field_descriptor, column_name);
auto nested_serializer = buildFieldSerializer(column_name, array_data_type.getNestedType(), field_descriptor,
/* allow_repeat = */ false); // We do our repeating now, so for nested type we forget about the repeating.
@ -2860,12 +2856,7 @@ namespace
/// Serialize as a repeated field.
if (!allow_repeat && (size_of_tuple > 1))
{
throw Exception(
"The field " + quoteString(field_descriptor.full_name())
+ " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
}
throwFieldNotRepeated(field_descriptor, column_name);
std::vector<std::unique_ptr<ProtobufSerializer>> nested_serializers;
for (const auto & nested_data_type : tuple_data_type.getElements())
@ -2891,6 +2882,21 @@ namespace
}
}
[[noreturn]] static void throwFieldNotRepeated(const FieldDescriptor & field_descriptor, const std::string_view & column_name)
{
if (!field_descriptor.is_repeated())
throw Exception(
"The field " + quoteString(field_descriptor.full_name())
+ " must be repeated in the protobuf schema to match the column " + backQuote(StringRef{column_name}),
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
throw Exception(
"The field " + quoteString(field_descriptor.full_name())
+ " is repeated but the level of repeatedness is not enough to serialize a multidimensional array from the column "
+ backQuote(StringRef{column_name}) + ". It's recommended to make the parent field repeated as well.",
ErrorCodes::PROTOBUF_FIELD_NOT_REPEATED);
}
const ProtobufReaderOrWriter reader_or_writer;
};
}

View File

@ -99,7 +99,8 @@ FunctionOverloadResolverImplPtr FunctionFactory::tryGetImpl(
res = it->second(context);
else
{
it = case_insensitive_functions.find(Poco::toLower(name));
name = Poco::toLower(name);
it = case_insensitive_functions.find(name);
if (case_insensitive_functions.end() != it)
res = it->second(context);
}

View File

@ -2586,7 +2586,7 @@ private:
WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
{
return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types]
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr
{
const auto * col = arguments.front().column.get();
const auto & column_tuple = assert_cast<const ColumnTuple &>(*col);
@ -2597,7 +2597,7 @@ private:
{
const auto & column_array = assert_cast<const ColumnArray &>(column_tuple.getColumn(i));
ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}};
converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, input_rows_count);
converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size());
offsets[i] = column_array.getOffsetsPtr();
}

View File

@ -1232,8 +1232,8 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types,
{
const auto & left_type_origin = data_types[i];
const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin));
auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin));
if (!left_type->equals(*right_type))
throw Exception(
"Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is "

View File

@ -340,9 +340,15 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx
bool found_in_cache = false;
const auto & result_type = header.getByPosition(column_idx).type;
const char * delimiter = (column_idx + 1 == num_columns) ? ")" : ",";
auto structure = templates_cache.getFromCacheOrConstruct(result_type, format_settings.null_as_default,
TokenIterator(tokens), token_iterator,
ast, *context, &found_in_cache, delimiter);
auto structure = templates_cache.getFromCacheOrConstruct(
result_type,
!result_type->isNullable() && format_settings.null_as_default,
TokenIterator(tokens),
token_iterator,
ast,
*context,
&found_in_cache,
delimiter);
templates[column_idx].emplace(structure);
if (found_in_cache)
++attempts_to_deduce_template_cached[column_idx];

View File

@ -40,7 +40,7 @@ namespace ErrorCodes
struct PollResult
{
size_t ready_responses_count{0};
size_t responses_count{0};
bool has_requests{false};
bool error{false};
};
@ -70,14 +70,14 @@ struct SocketInterruptablePollWrapper
if (epollfd < 0)
throwFromErrno("Cannot epoll_create", ErrorCodes::SYSTEM_ERROR);
socket_event.events = EPOLLIN | EPOLLERR;
socket_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
socket_event.data.fd = sockfd;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &socket_event) < 0)
{
::close(epollfd);
throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
}
pipe_event.events = EPOLLIN | EPOLLERR;
pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
pipe_event.data.fd = pipe.fds_rw[0];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipe.fds_rw[0], &pipe_event) < 0)
{
@ -92,13 +92,24 @@ struct SocketInterruptablePollWrapper
return pipe.fds_rw[1];
}
PollResult poll(Poco::Timespan remaining_time)
PollResult poll(Poco::Timespan remaining_time, const std::shared_ptr<ReadBufferFromPocoSocket> & in)
{
bool socket_ready = false;
bool fd_ready = false;
if (in->available() != 0)
socket_ready = true;
if (response_in.available() != 0)
fd_ready = true;
int rc = 0;
if (!fd_ready)
{
std::array<int, 2> outputs = {-1, -1};
#if defined(POCO_HAVE_FD_EPOLL)
int rc;
epoll_event evout[2];
memset(evout, 0, sizeof(evout));
evout[0].data.fd = evout[1].data.fd = -1;
do
{
Poco::Timestamp start;
@ -115,10 +126,13 @@ struct SocketInterruptablePollWrapper
}
while (rc < 0 && errno == EINTR);
if (rc >= 1 && evout[0].events & EPOLLIN)
outputs[0] = evout[0].data.fd;
if (rc == 2 && evout[1].events & EPOLLIN)
outputs[1] = evout[1].data.fd;
for (int i = 0; i < rc; ++i)
{
if (evout[i].data.fd == sockfd)
socket_ready = true;
if (evout[i].data.fd == pipe.fds_rw[0])
fd_ready = true;
}
#else
pollfd poll_buf[2];
poll_buf[0].fd = sockfd;
@ -126,7 +140,6 @@ struct SocketInterruptablePollWrapper
poll_buf[1].fd = pipe.fds_rw[0];
poll_buf[1].events = POLLIN;
int rc;
do
{
Poco::Timestamp start;
@ -142,47 +155,29 @@ struct SocketInterruptablePollWrapper
}
}
while (rc < 0 && errno == POCO_EINTR);
if (rc >= 1 && poll_buf[0].revents & POLLIN)
outputs[0] = sockfd;
socket_ready = true;
if (rc == 2 && poll_buf[1].revents & POLLIN)
outputs[1] = pipe.fds_rw[0];
fd_ready = true;
#endif
}
PollResult result{};
if (rc < 0)
{
result.error = true;
return result;
}
else if (rc == 0)
{
return result;
}
else
{
for (auto fd : outputs)
{
if (fd != -1)
{
if (fd == sockfd)
result.has_requests = true;
else
result.has_requests = socket_ready;
if (fd_ready)
{
UInt8 dummy;
do
{
/// All ready responses stored in responses queue,
/// but we have to count amount of ready responses in pipe
/// and process them only. Otherwise states of response_in
/// and response queue will be inconsistent and race condition is possible.
readIntBinary(dummy, response_in);
result.ready_responses_count++;
}
while (response_in.available());
}
}
}
result.responses_count = 1;
auto available = response_in.available();
response_in.ignore(available);
result.responses_count += available;
}
if (rc < 0)
result.error = true;
return result;
}
@ -339,10 +334,8 @@ void NuKeeperTCPHandler::runImpl()
{
using namespace std::chrono_literals;
PollResult result = poll_wrapper->poll(session_timeout);
PollResult result = poll_wrapper->poll(session_timeout, in);
if (result.has_requests && !close_received)
{
do
{
auto [received_op, received_xid] = receiveRequest();
@ -351,7 +344,6 @@ void NuKeeperTCPHandler::runImpl()
LOG_DEBUG(log, "Received close event with xid {} for session id #{}", received_xid, session_id);
close_xid = received_xid;
close_received = true;
break;
}
else if (received_op == Coordination::OpNum::Heartbeat)
{
@ -359,23 +351,23 @@ void NuKeeperTCPHandler::runImpl()
session_stopwatch.restart();
}
}
while (in->available());
}
/// Process exact amount of responses from pipe
/// otherwise state of responses queue and signaling pipe
/// became inconsistent and race condition is possible.
while (result.ready_responses_count != 0)
while (result.responses_count != 0)
{
Coordination::ZooKeeperResponsePtr response;
if (!responses->tryPop(response))
throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have at least {} ready responses, but queue is empty. It's a bug.", result.ready_responses_count);
throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have ready response, but queue is empty. It's a bug.");
if (response->xid == close_xid)
{
LOG_DEBUG(log, "Session #{} successfully closed", session_id);
return;
}
response->write(*out);
if (response->error == Coordination::Error::ZSESSIONEXPIRED)
{
@ -383,7 +375,8 @@ void NuKeeperTCPHandler::runImpl()
nu_keeper_storage_dispatcher->finishSession(session_id);
return;
}
result.ready_responses_count--;
result.responses_count--;
}
if (result.error)

View File

@ -2,12 +2,15 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<snapshot_distance>0</snapshot_distance>
<reserved_log_items>0</reserved_log_items>
<force_sync>false</force_sync>
<startup_timeout>60000</startup_timeout>
</coordination_settings>
<raft_configuration>

View File

@ -2,11 +2,13 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
<force_sync>false</force_sync>
</coordination_settings>
<raft_configuration>

View File

@ -0,0 +1,8 @@
<yandex>
<zookeeper>
<node index="1">
<host>node1</host>
<port>9181</port>
</node>
</zookeeper>
</yandex>

View File

@ -8,27 +8,18 @@ from multiprocessing.dummy import Pool
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True)
from kazoo.client import KazooClient, KazooState
_genuine_zk_instance = None
_fake_zk_instance = None
from kazoo.client import KazooClient, KazooState, KeeperState
def get_genuine_zk():
global _genuine_zk_instance
if not _genuine_zk_instance:
print("Zoo1", cluster.get_instance_ip("zoo1"))
_genuine_zk_instance = cluster.get_kazoo_client('zoo1')
return _genuine_zk_instance
return cluster.get_kazoo_client('zoo1')
def get_fake_zk():
global _fake_zk_instance
if not _fake_zk_instance:
print("node", cluster.get_instance_ip("node"))
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip("node") + ":9181", timeout=30.0)
def reset_last_zxid_listener(state):
print("Fake zk callback called for state", state)
global _fake_zk_instance
nonlocal _fake_zk_instance
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
@ -44,6 +35,15 @@ def create_random_path(prefix="", depth=1):
return prefix
return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
def stop_zk(zk):
try:
if zk:
zk.stop()
zk.close()
except:
pass
@pytest.fixture(scope="module")
def started_cluster():
try:
@ -53,15 +53,10 @@ def started_cluster():
finally:
cluster.shutdown()
if _genuine_zk_instance:
_genuine_zk_instance.stop()
_genuine_zk_instance.close()
if _fake_zk_instance:
_fake_zk_instance.stop()
_fake_zk_instance.close()
def test_simple_commands(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
@ -75,9 +70,13 @@ def test_simple_commands(started_cluster):
assert zk.exists("/test_simple_commands/somenode1")
print(zk.get("/test_simple_commands/somenode1"))
assert zk.get("/test_simple_commands/somenode1")[0] == b"world"
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_sequential_nodes(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_sequential_nodes")
@ -91,6 +90,9 @@ def test_sequential_nodes(started_cluster):
genuine_childs = list(sorted(genuine_zk.get_children("/test_sequential_nodes")))
fake_childs = list(sorted(fake_zk.get_children("/test_sequential_nodes")))
assert genuine_childs == fake_childs
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def assert_eq_stats(stat1, stat2):
@ -102,6 +104,7 @@ def assert_eq_stats(stat1, stat2):
assert stat1.numChildren == stat2.numChildren
def test_stats(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_stats_nodes")
@ -139,8 +142,12 @@ def test_stats(started_cluster):
print(genuine_stats)
print(fake_stats)
assert_eq_stats(genuine_stats, fake_stats)
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_watchers(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_data_watches")
@ -196,8 +203,12 @@ def test_watchers(started_cluster):
print("Genuine children", genuine_children)
print("Fake children", fake_children)
assert genuine_children == fake_children
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_multitransactions(started_cluster):
try:
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
for zk in [genuine_zk, fake_zk]:
@ -225,7 +236,9 @@ def test_multitransactions(started_cluster):
assert zk.exists('/test_multitransactions/q') is None
assert zk.exists('/test_multitransactions/a') is None
assert zk.exists('/test_multitransactions/x') is None
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def exists(zk, path):
result = zk.exists(path)
@ -278,13 +291,13 @@ class Request(object):
arg_str = ', '.join([str(k) + "=" + str(v) for k, v in self.arguments.items()])
return "ZKRequest name {} with arguments {}".format(self.name, arg_str)
def generate_requests(iters=1):
def generate_requests(prefix="/", iters=1):
requests = []
existing_paths = []
for i in range(iters):
for _ in range(100):
rand_length = random.randint(0, 10)
path = "/"
path = prefix
for j in range(1, rand_length):
path = create_random_path(path, 1)
existing_paths.append(path)
@ -322,9 +335,13 @@ def generate_requests(iters=1):
def test_random_requests(started_cluster):
requests = generate_requests(10)
try:
requests = generate_requests("/test_random_requests", 10)
print("Generated", len(requests), "requests")
genuine_zk = get_genuine_zk()
fake_zk = get_fake_zk()
genuine_zk.create("/test_random_requests")
fake_zk.create("/test_random_requests")
for i, request in enumerate(requests):
genuine_throw = False
fake_throw = False
@ -333,20 +350,28 @@ def test_random_requests(started_cluster):
try:
genuine_result = request.callback(genuine_zk)
except Exception as ex:
print("i", i, "request", request)
print("Genuine exception", str(ex))
genuine_throw = True
try:
fake_result = request.callback(fake_zk)
except Exception as ex:
print("i", i, "request", request)
print("Fake exception", str(ex))
fake_throw = True
assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa"
assert fake_throw == genuine_throw, "Fake throw genuine not or vise versa request {}"
assert fake_result == genuine_result, "Zookeeper results differ"
root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_genuine = [elem for elem in list(sorted(genuine_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
root_children_fake = [elem for elem in list(sorted(fake_zk.get_children("/test_random_requests"))) if elem not in ('clickhouse', 'zookeeper')]
assert root_children_fake == root_children_genuine
finally:
for zk in [genuine_zk, fake_zk]:
stop_zk(zk)
def test_end_of_session(started_cluster):
fake_zk1 = None
fake_zk2 = None
genuine_zk1 = None
@ -401,13 +426,8 @@ def test_end_of_session(started_cluster):
assert fake_ephemeral_event == genuine_ephemeral_event
finally:
try:
for zk in [fake_zk1, fake_zk2, genuine_zk1, genuine_zk2]:
if zk:
zk.stop()
zk.close()
except:
pass
stop_zk(zk)
def test_end_of_watches_session(started_cluster):
fake_zk1 = None
@ -442,15 +462,11 @@ def test_end_of_watches_session(started_cluster):
assert dummy_set == 2
finally:
try:
for zk in [fake_zk1, fake_zk2]:
if zk:
zk.stop()
zk.close()
except:
pass
stop_zk(zk)
def test_concurrent_watches(started_cluster):
try:
fake_zk = get_fake_zk()
fake_zk.restart()
global_path = "/test_concurrent_watches_0"
@ -530,3 +546,5 @@ def test_concurrent_watches(started_cluster):
print("Diff", list(set(all_paths_created) - set(all_paths_triggered)))
assert dumb_watch_triggered_counter == watches_must_be_triggered
finally:
stop_zk(fake_zk)

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -6,6 +6,7 @@ import os
import time
from multiprocessing.dummy import Pool
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
@ -14,6 +15,18 @@ node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3
from kazoo.client import KazooClient, KazooState
"""
In this test, we blockade RAFT leader and check that the whole system is
able to recover. It's not a good test because we use ClickHouse's replicated
tables to check connectivity, but they may require special operations (or a long
wait) after session expiration. We don't use kazoo, because this client pretends
to be very smart: SUSPEND sessions, try to recover them, and so on. The test
will be even less predictable than with ClickHouse tables.
TODO find (or write) not so smart python client.
TODO remove this when jepsen tests will be written.
"""
@pytest.fixture(scope="module")
def started_cluster():
try:
@ -55,7 +68,6 @@ def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
def reset_listener(state):
nonlocal _fake_zk_instance
print("Fake zk callback called for state", state)
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
@ -67,19 +79,25 @@ def get_fake_zk(nodename, timeout=30.0):
# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
@pytest.mark.timeout(600)
def test_blocade_leader(started_cluster):
for i in range(100):
wait_nodes()
try:
for i, node in enumerate([node1, node2, node3]):
node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
node.query("CREATE TABLE ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
node.query("CREATE TABLE IF NOT EXISTS ordinary.t1 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t1', '{}') ORDER BY tuple()".format(i + 1))
break
except Exception as ex:
print("Got exception from node", smaller_exception(ex))
time.sleep(0.1)
node2.query("INSERT INTO ordinary.t1 SELECT number FROM numbers(10)")
node1.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
node3.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
assert node1.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
assert node2.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
assert node3.query("SELECT COUNT() FROM ordinary.t1") == "10\n"
assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t1", "10")
assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t1", "10")
assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t1", "10")
with PartitionManager() as pm:
pm.partition_instances(node2, node1)
@ -87,7 +105,7 @@ def test_blocade_leader(started_cluster):
for i in range(100):
try:
node2.query("SYSTEM RESTART REPLICA ordinary.t1")
restart_replica_for_sure(node2, "ordinary.t1", "/clickhouse/t1/replicas/2")
node2.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
break
except Exception as ex:
@ -104,7 +122,7 @@ def test_blocade_leader(started_cluster):
for i in range(100):
try:
node3.query("SYSTEM RESTART REPLICA ordinary.t1")
restart_replica_for_sure(node3, "ordinary.t1", "/clickhouse/t1/replicas/3")
node3.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
break
except Exception as ex:
@ -122,7 +140,7 @@ def test_blocade_leader(started_cluster):
for n, node in enumerate([node1, node2, node3]):
for i in range(100):
try:
node.query("SYSTEM RESTART REPLICA ordinary.t1")
restart_replica_for_sure(node, "ordinary.t1", "/clickhouse/t1/replicas/{}".format(n + 1))
break
except Exception as ex:
try:
@ -150,7 +168,7 @@ def test_blocade_leader(started_cluster):
for n, node in enumerate([node1, node2, node3]):
for i in range(100):
try:
node.query("SYSTEM RESTART REPLICA ordinary.t1")
restart_replica_for_sure(node, "ordinary.t1", "/clickhouse/t1/replicas/{}".format(n + 1))
node.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
break
except Exception as ex:
@ -170,9 +188,9 @@ def test_blocade_leader(started_cluster):
for num, node in enumerate([node1, node2, node3]):
dump_zk(node, '/clickhouse/t1', '/clickhouse/t1/replicas/{}'.format(num + 1))
assert node1.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
assert node2.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
assert node3.query("SELECT COUNT() FROM ordinary.t1") == "310\n"
assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t1", "310")
assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t1", "310")
assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t1", "310")
def dump_zk(node, zk_path, replica_path):
@ -188,22 +206,47 @@ def dump_zk(node, zk_path, replica_path):
print("Parts")
print(node.query("SELECT name FROM system.zookeeper WHERE path = '{}/parts' FORMAT Vertical".format(replica_path)))
def restart_replica_for_sure(node, table_name, zk_replica_path):
fake_zk = None
try:
node.query("DETACH TABLE {}".format(table_name))
fake_zk = get_fake_zk(node.name)
if fake_zk.exists(zk_replica_path + "/is_active") is not None:
fake_zk.delete(zk_replica_path + "/is_active")
node.query("ATTACH TABLE {}".format(table_name))
except Exception as ex:
print("Exception", ex)
raise ex
finally:
if fake_zk:
fake_zk.stop()
fake_zk.close()
# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
@pytest.mark.timeout(600)
def test_blocade_leader_twice(started_cluster):
for i in range(100):
wait_nodes()
try:
for i, node in enumerate([node1, node2, node3]):
node.query("CREATE DATABASE IF NOT EXISTS ordinary ENGINE=Ordinary")
node.query("CREATE TABLE ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
node.query("CREATE TABLE IF NOT EXISTS ordinary.t2 (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t2', '{}') ORDER BY tuple()".format(i + 1))
break
except Exception as ex:
print("Got exception from node", smaller_exception(ex))
time.sleep(0.1)
node2.query("INSERT INTO ordinary.t2 SELECT number FROM numbers(10)")
node1.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
assert node1.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
assert node2.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
assert node3.query("SELECT COUNT() FROM ordinary.t2") == "10\n"
assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t2", "10")
assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "10")
assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "10")
with PartitionManager() as pm:
pm.partition_instances(node2, node1)
@ -211,7 +254,7 @@ def test_blocade_leader_twice(started_cluster):
for i in range(100):
try:
node2.query("SYSTEM RESTART REPLICA ordinary.t2")
restart_replica_for_sure(node2, "ordinary.t2", "/clickhouse/t2/replicas/2")
node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
break
except Exception as ex:
@ -228,7 +271,8 @@ def test_blocade_leader_twice(started_cluster):
for i in range(100):
try:
node3.query("SYSTEM RESTART REPLICA ordinary.t2")
restart_replica_for_sure(node3, "ordinary.t2", "/clickhouse/t2/replicas/3")
node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
break
except Exception as ex:
@ -243,6 +287,10 @@ def test_blocade_leader_twice(started_cluster):
dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
assert False, "Cannot reconnect for node3"
node2.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "210")
assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "210")
# Total network partition
pm.partition_instances(node3, node2)
@ -261,11 +309,10 @@ def test_blocade_leader_twice(started_cluster):
except Exception as ex:
time.sleep(0.5)
for n, node in enumerate([node1, node2, node3]):
for i in range(100):
try:
node.query("SYSTEM RESTART REPLICA ordinary.t2")
restart_replica_for_sure(node, "ordinary.t2", "/clickhouse/t2/replicas/{}".format(n + 1))
break
except Exception as ex:
try:
@ -293,13 +340,15 @@ def test_blocade_leader_twice(started_cluster):
dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
assert False, "Cannot reconnect for node{}".format(n + 1)
for n, node in enumerate([node1, node2, node3]):
for i in range(100):
all_done = True
for n, node in enumerate([node1, node2, node3]):
try:
node.query("SYSTEM RESTART REPLICA ordinary.t2")
restart_replica_for_sure(node, "ordinary.t2", "/clickhouse/t2/replicas/{}".format(n + 1))
node.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
break
except Exception as ex:
all_done = False
try:
node.query("ATTACH TABLE ordinary.t2")
except Exception as attach_ex:
@ -307,15 +356,18 @@ def test_blocade_leader_twice(started_cluster):
print("Got exception node{}".format(n + 1), smaller_exception(ex))
time.sleep(0.5)
if all_done:
break
else:
for num, node in enumerate([node1, node2, node3]):
dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
assert False, "Cannot reconnect for node{}".format(n + 1)
assert False, "Cannot reconnect in i {} retries".format(i)
assert node1.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
assert_eq_with_retry(node1, "SELECT COUNT() FROM ordinary.t2", "510")
if node2.query("SELECT COUNT() FROM ordinary.t2") != "510\n":
for num, node in enumerate([node1, node2, node3]):
dump_zk(node, '/clickhouse/t2', '/clickhouse/t2/replicas/{}'.format(num + 1))
assert node2.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
assert node3.query("SELECT COUNT() FROM ordinary.t2") == "510\n"
assert_eq_with_retry(node2, "SELECT COUNT() FROM ordinary.t2", "510")
assert_eq_with_retry(node3, "SELECT COUNT() FROM ordinary.t2", "510")

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -2,6 +2,7 @@
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>

View File

@ -6,6 +6,7 @@ import os
import time
from multiprocessing.dummy import Pool
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
@ -234,6 +235,6 @@ def test_simple_replicated_table(started_cluster):
node1.query("SYSTEM SYNC REPLICA t", timeout=10)
node3.query("SYSTEM SYNC REPLICA t", timeout=10)
assert node1.query("SELECT COUNT() FROM t") == "10\n"
assert node2.query("SELECT COUNT() FROM t") == "10\n"
assert node3.query("SELECT COUNT() FROM t") == "10\n"
assert_eq_with_retry(node1, "SELECT COUNT() FROM t", "10")
assert_eq_with_retry(node2, "SELECT COUNT() FROM t", "10")
assert_eq_with_retry(node3, "SELECT COUNT() FROM t", "10")

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,21 @@
<yandex>
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>44444</port>
</server>
</raft_configuration>
</test_keeper_server>
</yandex>

View File

@ -0,0 +1,12 @@
<yandex>
<shutdown_wait_unfinished>3</shutdown_wait_unfinished>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<zookeeper>
<node index="1">
<host>node</host>
<port>9181</port>
</node>
</zookeeper>
</yandex>

View File

@ -0,0 +1,124 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import random
import string
import os
import time
from kazoo.client import KazooClient, KazooState
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/enable_test_keeper.xml', 'configs/logs_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
def random_string(length):
return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
def create_random_path(prefix="", depth=1):
if depth == 0:
return prefix
return create_random_path(os.path.join(prefix, random_string(3)), depth - 1)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_connection_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
def reset_listener(state):
nonlocal _fake_zk_instance
print("Fake zk callback called for state", state)
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
_fake_zk_instance.add_listener(reset_listener)
_fake_zk_instance.start()
return _fake_zk_instance
def test_state_after_restart(started_cluster):
try:
node_zk = None
node_zk2 = None
node_zk = get_connection_zk("node")
node_zk.create("/test_state_after_restart", b"somevalue")
strs = []
for i in range(100):
strs.append(random_string(123).encode())
node_zk.create("/test_state_after_restart/node" + str(i), strs[i])
for i in range(100):
if i % 7 == 0:
node_zk.delete("/test_state_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
node_zk2 = get_connection_zk("node")
assert node_zk2.get("/test_state_after_restart")[0] == b"somevalue"
for i in range(100):
if i % 7 == 0:
assert node_zk2.exists("/test_state_after_restart/node" + str(i)) is None
else:
assert len(node_zk2.get("/test_state_after_restart/node" + str(i))[0]) == 123
assert node_zk2.get("/test_state_after_restart/node" + str(i))[0] == strs[i]
finally:
try:
if node_zk is not None:
node_zk.stop()
node_zk.close()
if node_zk2 is not None:
node_zk2.stop()
node_zk2.close()
except:
pass
# http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html
def test_ephemeral_after_restart(started_cluster):
try:
node_zk = None
node_zk2 = None
node_zk = get_connection_zk("node")
node_zk.create("/test_ephemeral_after_restart", b"somevalue")
strs = []
for i in range(100):
strs.append(random_string(123).encode())
node_zk.create("/test_ephemeral_after_restart/node" + str(i), strs[i], ephemeral=True)
for i in range(100):
if i % 7 == 0:
node_zk.delete("/test_ephemeral_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
node_zk2 = get_connection_zk("node")
assert node_zk2.get("/test_ephemeral_after_restart")[0] == b"somevalue"
for i in range(100):
if i % 7 == 0:
assert node_zk2.exists("/test_ephemeral_after_restart/node" + str(i)) is None
else:
assert len(node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0]) == 123
assert node_zk2.get("/test_ephemeral_after_restart/node" + str(i))[0] == strs[i]
finally:
try:
if node_zk is not None:
node_zk.stop()
node_zk.close()
if node_zk2 is not None:
node_zk2.stop()
node_zk2.close()
except:
pass

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,39 @@
<yandex>
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</test_keeper_server>
</yandex>

View File

@ -0,0 +1,39 @@
<yandex>
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</test_keeper_server>
</yandex>

View File

@ -0,0 +1,39 @@
<yandex>
<test_keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<priority>3</priority>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>2</priority>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>44444</port>
<can_become_leader>true</can_become_leader>
<start_as_follower>true</start_as_follower>
<priority>1</priority>
</server>
</raft_configuration>
</test_keeper_server>
</yandex>

View File

@ -0,0 +1,12 @@
<yandex>
<shutdown_wait_unfinished>3</shutdown_wait_unfinished>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
</yandex>

View File

@ -0,0 +1,16 @@
<yandex>
<zookeeper>
<node index="1">
<host>node1</host>
<port>9181</port>
</node>
<node index="2">
<host>node2</host>
<port>9181</port>
</node>
<node index="3">
<host>node3</host>
<port>9181</port>
</node>
</zookeeper>
</yandex>

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import random
import string
import os
import time
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/enable_test_keeper1.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
node2 = cluster.add_instance('node2', main_configs=['configs/enable_test_keeper2.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
node3 = cluster.add_instance('node3', main_configs=['configs/enable_test_keeper3.xml', 'configs/log_conf.xml', 'configs/use_test_keeper.xml'], stay_alive=True)
from kazoo.client import KazooClient, KazooState
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout)
def reset_listener(state):
nonlocal _fake_zk_instance
print("Fake zk callback called for state", state)
if state != KazooState.CONNECTED:
_fake_zk_instance._reset()
_fake_zk_instance.add_listener(reset_listener)
_fake_zk_instance.start()
return _fake_zk_instance
def stop_zk(zk):
try:
if zk:
zk.stop()
zk.close()
except:
pass
def test_restart_multinode(started_cluster):
try:
node1_zk = node2_zk = node3_zk = None
node1_zk = get_fake_zk("node1")
node2_zk = get_fake_zk("node2")
node3_zk = get_fake_zk("node3")
for i in range(100):
node1_zk.create("/test_read_write_multinode_node" + str(i), ("somedata" + str(i)).encode())
for i in range(100):
if i % 10 == 0:
node1_zk.delete("/test_read_write_multinode_node" + str(i))
node2_zk.sync("/test_read_write_multinode_node0")
node3_zk.sync("/test_read_write_multinode_node0")
for i in range(100):
if i % 10 != 0:
assert node2_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
assert node3_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
else:
assert node2_zk.exists("/test_read_write_multinode_node" + str(i)) is None
assert node3_zk.exists("/test_read_write_multinode_node" + str(i)) is None
finally:
for zk in [node1_zk, node2_zk, node3_zk]:
stop_zk(zk)
node1.restart_clickhouse(kill=True)
node2.restart_clickhouse(kill=True)
node3.restart_clickhouse(kill=True)
for i in range(100):
try:
node1_zk = get_fake_zk("node1")
node2_zk = get_fake_zk("node2")
node3_zk = get_fake_zk("node3")
for i in range(100):
if i % 10 != 0:
assert node1_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
assert node2_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
assert node3_zk.get("/test_read_write_multinode_node" + str(i))[0] == ("somedata" + str(i)).encode()
else:
assert node1_zk.exists("/test_read_write_multinode_node" + str(i)) is None
assert node2_zk.exists("/test_read_write_multinode_node" + str(i)) is None
assert node3_zk.exists("/test_read_write_multinode_node" + str(i)) is None
break
except Exception as ex:
print("Got exception as ex", ex)
finally:
for zk in [node1_zk, node2_zk, node3_zk]:
stop_zk(zk)

View File

@ -1,18 +1,19 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS array_3dim_protobuf_00825;
CREATE TABLE array_3dim_protobuf_00825
(
`a_b_c` Array(Array(Array(Int32)))
a_b_c Array(Array(Array(Int32)))
) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]);
@ -21,15 +22,16 @@ SELECT * FROM array_3dim_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE array_3dim_protobuf_00825"

View File

@ -3,18 +3,21 @@
# https://github.com/ClickHouse/ClickHouse/issues/9069
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS array_of_arrays_protobuf_00825;
CREATE TABLE array_of_arrays_protobuf_00825
(
`a` String,
`b` Nested (
`c` Array(Float64)
a String,
b Nested (
c Array(Float64)
)
) ENGINE = MergeTree ORDER BY tuple();
@ -24,15 +27,16 @@ SELECT * FROM array_of_arrays_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE array_of_arrays_protobuf_00825"

View File

@ -3,13 +3,14 @@
# https://github.com/ClickHouse/ClickHouse/issues/7438
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS enum_mapping_protobuf_00825;
CREATE TABLE enum_mapping_protobuf_00825
@ -23,15 +24,16 @@ SELECT * FROM enum_mapping_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE enum_mapping_protobuf_00825"

View File

@ -1,5 +0,0 @@
echo -ne '\xf3\x01\x0a\x24\x61\x37\x35\x32\x32\x31\x35\x38\x2d\x33\x64\x34\x31\x2d\x34\x62\x37\x37\x2d\x61\x64\x36\x39\x2d\x36\x63\x35\x39\x38\x65\x65\x35\x35\x63\x34\x39\x12\x04\x49\x76\x61\x6e\x1a\x06\x50\x65\x74\x72\x6f\x76\x20\x01\x28\xaf\x1f\x32\x03\x70\x6e\x67\x3a\x0c\x2b\x37\x34\x39\x35\x31\x32\x33\x34\x35\x36\x37\x40\x01\x4d\xfc\xd0\x30\x5c\x50\x26\x58\x09\x62\x09\x59\x65\x73\x74\x65\x72\x64\x61\x79\x62\x07\x46\x6c\x6f\x77\x65\x72\x73\x6a\x04\xff\x01\x00\x00\x72\x06\x4d\x6f\x73\x63\x6f\x77\x7a\x08\x4b\x03\x5f\x42\x72\x7d\x16\x42\x81\x01\x1f\x85\xeb\x51\xb8\x1e\x09\x40\x89\x01\x33\x33\x33\x33\x33\xc3\x6a\x40\x95\x01\xcd\xcc\xcc\x3d\x9d\x01\x9a\x99\xb9\x40\xa0\x01\x80\xc4\xd7\x8d\x7f\xaa\x01\x0c\x0a\x05\x6d\x65\x74\x65\x72\x15\x00\x00\x80\x3f\xaa\x01\x11\x0a\x0a\x63\x65\x6e\x74\x69\x6d\x65\x74\x65\x72\x15\x0a\xd7\x23\x3c\xaa\x01\x10\x0a\x09\x6b\x69\x6c\x6f\x6d\x65\x74\x65\x72\x15\x00\x00\x7a\x44\xb2\x01\x10\x0a\x0e\xa2\x06\x0b\x0a\x09\x08\xf4\x03\x12\x04\xf5\x03\xf6\x03\x7e\x0a\x24\x63\x36\x39\x34\x61\x64\x38\x61\x2d\x66\x37\x31\x34\x2d\x34\x65\x61\x33\x2d\x39\x30\x37\x64\x2d\x66\x64\x35\x34\x66\x62\x32\x35\x64\x39\x62\x35\x12\x07\x4e\x61\x74\x61\x6c\x69\x61\x1a\x08\x53\x6f\x6b\x6f\x6c\x6f\x76\x61\x28\xa6\x3f\x32\x03\x6a\x70\x67\x50\x1a\x58\x0b\x6a\x04\x64\xc8\x01\x32\x72\x08\x50\x6c\x79\x6d\x6f\x75\x74\x68\x7a\x08\x6a\x9d\x49\x42\x46\x8c\x84\xc0\x81\x01\x6e\x86\x1b\xf0\xf9\x21\x09\x40\x95\x01\x42\x60\xe5\x3b\x9d\x01\xcd\xcc\xac\x40\xa0\x01\xff\xff\xa9\xce\x93\x8c\x09\xc0\x01\x0a\x24\x61\x37\x64\x61\x31\x61\x61\x36\x2d\x66\x34\x32\x35\x2d\x34\x37\x38\x39\x2d\x38\x39\x34\x37\x2d\x62\x30\x33\x34\x37\x38\x36\x65\x64\x33\x37\x34\x12\x06\x56\x61\x73\x69\x6c\x79\x1a\x07\x53\x69\x64\x6f\x72\x6f\x76\x20\x01\x28\xfb\x48\x32\x03\x62\x6d\x70\x3a\x0d\x2b\x34\x34\x32\x30\x31\x32\x33\x34\x35\x36\x37\x38\x40\x01\x4d\x50\xe0\x27\x5c\x50\x17\x58\x04\x62\x05\x53\x75\x6e\x6e\x79\x6a\x05\xfa\x01\xf4\x01\x0a\x72\x08\x4d\x75\x72\x6d\x61\x6e\x73\x6b\x7a\x08\xfd\xf0\x89\x42\xc8\x4c\x04\x42\x81\x01\x11\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x37\x42\x95\x01\x00\x00\x48\x44\x9d\x01\xcd\xcc\x4c\xc0\xa0\x01\x80\xd4\x9f\x93\x01\xaa\x01\x0c\x0a\x05\x70\x6f\x75\x6e\x64\x15\x00\x00\x80\x41\xb2\x01\x0a\x0a\x08\xa2\x06\x05\x0a\x03\x08\xf7\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
echo -ne '\xb3\x01\x12\x05\x46\x72\x69\x64\x61\x28\x99\xe1\xf3\xd1\x0b\x52\x08\x45\x72\x6d\x61\x6b\x6f\x76\x61\x72\x0c\x00\x00\xdc\x42\x00\x00\x52\x43\x00\x00\x94\x42\x79\x48\xce\x3d\x51\x00\x00\x00\x00\xc8\x02\x14\xc2\x05\x08\x00\x00\x80\x44\x00\x00\x80\x49\x9a\x06\x02\x4b\x42\x9a\x06\x02\x4d\x42\xa1\x06\x00\x00\x00\x00\x00\x00\xe0\x3f\xa8\x06\x2a\xa8\x06\xa8\xff\xff\xff\xff\xff\xff\xff\xff\x01\xb0\x06\x01\xbd\x06\x25\x06\x49\x40\xfa\x06\x02\x34\x30\x90\x08\xe2\x08\xe1\x08\x89\xe6\x6e\xdd\x01\x00\x00\x00\xb0\x09\xc3\x19\xd0\x0c\xb7\x02\xe2\x12\x24\x32\x30\x66\x63\x64\x39\x35\x61\x2d\x33\x33\x32\x64\x2d\x34\x31\x64\x62\x2d\x61\x39\x65\x63\x2d\x31\x36\x31\x66\x36\x34\x34\x64\x30\x35\x39\x63\xa0\x38\xbc\x05\xaa\x38\x02\xbd\x05\xb4\x01\x08\x01\x12\x06\x49\x73\x6f\x6c\x64\x65\x52\x07\x4c\x61\x76\x72\x6f\x76\x61\x72\x0c\x00\x00\x7f\x43\x00\x00\x00\x00\x00\x00\x7f\x43\xaa\x01\x03\x61\x62\x63\xc8\x02\x32\xc2\x05\x08\x00\x00\x00\x41\x00\x00\x80\x3f\x9a\x06\x04\x42\x79\x74\x65\x9a\x06\x03\x42\x69\x74\xa1\x06\x00\x00\x00\x00\x00\x00\x12\x40\xa8\x06\x1a\xa8\x06\xb0\xff\xff\xff\xff\xff\xff\xff\xff\x01\xb0\x06\x01\xbd\x06\xf9\x0f\x49\x40\xc2\x06\x01\x2c\xfa\x06\x02\x33\x32\x90\x08\x78\xe1\x08\x39\x4e\x2b\xfe\xe4\xf5\xff\xff\xb0\x09\xe8\x30\xd8\x12\x01\xe2\x12\x24\x37\x63\x66\x61\x36\x38\x35\x36\x2d\x61\x35\x34\x61\x2d\x34\x37\x38\x36\x2d\x62\x38\x65\x35\x2d\x37\x34\x35\x31\x35\x39\x64\x35\x32\x32\x37\x38\xa0\x38\xbe\x05\xc2\x3e\x05\x15\x00\x00\xb6\x42' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson'"
echo -ne '\xa5\x02\x0a\x24\x61\x61\x30\x65\x35\x61\x30\x36\x2d\x63\x61\x62\x32\x2d\x34\x30\x33\x34\x2d\x61\x36\x61\x32\x2d\x34\x38\x65\x38\x32\x62\x39\x31\x36\x36\x34\x65\x12\x06\x4c\x65\x6f\x6e\x69\x64\x1a\x08\x4b\x69\x72\x69\x6c\x6c\x6f\x76\x22\x04\x6d\x61\x6c\x65\x2a\x0a\x31\x39\x38\x33\x2d\x30\x36\x2d\x32\x34\x3a\x0c\x2b\x37\x34\x39\x35\x30\x32\x37\x35\x38\x36\x34\x42\x01\x31\x4a\x13\x32\x30\x31\x39\x2d\x30\x32\x2d\x30\x34\x20\x30\x39\x3a\x34\x35\x3a\x30\x30\x52\x02\x33\x35\x5a\x06\x63\x61\x6e\x63\x65\x72\x62\x07\x37\x20\x72\x69\x6e\x67\x73\x62\x08\x45\x61\x73\x74\x73\x69\x64\x65\x62\x0b\x4c\x61\x73\x74\x20\x48\x75\x72\x72\x61\x68\x6a\x01\x30\x6a\x01\x30\x6a\x03\x32\x35\x35\x72\x09\x53\x61\x6e\x20\x44\x69\x65\x67\x6f\x7a\x09\x33\x32\x2e\x38\x32\x33\x39\x34\x33\x7a\x0b\x2d\x31\x31\x37\x2e\x30\x38\x31\x33\x32\x37\x82\x01\x09\x33\x2e\x31\x34\x31\x35\x39\x32\x37\x8a\x01\x08\x31\x35\x30\x30\x30\x30\x30\x30\x92\x01\x06\x31\x38\x36\x2e\x37\x35\x9a\x01\x04\x2d\x32\x2e\x31\xa2\x01\x0b\x32\x30\x36\x35\x39\x38\x32\x39\x33\x33\x31\xaa\x01\x18\x0a\x06\x6d\x69\x6e\x75\x74\x65\x0a\x04\x68\x6f\x75\x72\x12\x02\x36\x30\x12\x04\x33\x36\x30\x30\xb2\x01\x08\x0a\x06\x12\x04\x31\x38\x30\x30' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson'"
echo -ne '\xdd\x01\x0a\x24\x33\x66\x61\x65\x65\x30\x36\x34\x2d\x63\x34\x66\x37\x2d\x34\x64\x33\x34\x2d\x62\x36\x66\x33\x2d\x38\x64\x38\x31\x63\x32\x62\x36\x61\x31\x35\x64\x12\x04\x4e\x69\x63\x6b\x1a\x0a\x4b\x6f\x6c\x65\x73\x6e\x69\x6b\x6f\x76\x20\x01\x28\xda\x52\x32\x03\x62\x6d\x70\x3a\x0c\x34\x31\x32\x2d\x36\x38\x37\x2d\x35\x30\x30\x37\x40\x01\x4d\x2f\x27\xf2\x5b\x50\x14\x58\x09\x62\x06\x48\x61\x76\x61\x6e\x61\x68\x80\x01\x68\x00\x68\x80\x01\x72\x0a\x50\x69\x74\x74\x73\x62\x75\x72\x67\x68\x7a\x08\x9b\x11\x22\x42\x1f\xe6\x9f\xc2\x81\x01\x28\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x27\x42\x95\x01\x00\x00\x43\x44\x9d\x01\x66\x66\x92\x41\xa0\x01\xce\xdf\xb8\xba\x01\xab\x01\x0d\xcd\xcc\xe2\x41\x0d\xcd\xcc\x4c\x3e\x0d\x00\x00\x80\x3f\x12\x05\x6f\x75\x6e\x63\x65\x12\x05\x63\x61\x72\x61\x74\x12\x04\x67\x72\x61\x6d\xac\x01\xb3\x01\x0b\xa2\x06\x05\x0b\x08\x96\x4a\x0c\x0c\xb4\x01' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person'"
echo -ne '\x04\x08\x02\x10\x04\x00\x04\x08\x03\x10\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"

View File

@ -1,18 +0,0 @@
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 bmp +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970680,33.074982] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 jpg \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
aa0e5a06-cab2-4034-a6a2-48e82b91664e Leonid Kirillov male 1983-06-24 \N +74950275864\0 1 2019-02-04 09:45:00 35 cancer ['7 rings','Eastside','Last Hurrah'] [0,0,255] San Diego [32.823943,-117.081327] 3.1415927 15000000.00 186.75 -2.1 20659829331 ['minute','hour'] [60,3600] \N [1800]
20fcd95a-332d-41db-a9ec-161f644d059c Frida Ermakova female 1978-12-12 \N 3124555929\0\0\0 0 2013-03-11 16:30:00 40 sagittarius [] [110,210,74] [42.000000,-88.000000] 3.1410000324249268 311.00 0.5 10.0 8010000009 ['KB','MB'] [1024,1048576] 700 [701]
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753216,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
3faee064-c4f7-4d34-b6f3-8d81c2b6a15d Nick Kolesnikov male 1998-12-26 bmp 412-687-5007\0 1 2018-11-19 05:59:59 20 capricorn ['Havana'] [128,0,128] Pittsburgh [40.517192,-79.949456] 3.1415926535898 50000000000.00 780 18.3 195500007 ['ounce','carat','gram'] [28.35,0.2,1] 9494 []
7cfa6856-a54a-4786-b8e5-745159d52278 Isolde Lavrova female 1987-02-09 \N \N 1 \N 32 aquarius [] [255,0,255] [26.000000,-80.000000] 3.1415998935699463 \N 4.5 25.0 -11111111111111 ['Byte','Bit'] [8,1] 702 []
0 0
2 4
3 9
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 bmp +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970680,33.074982] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 jpg \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753216,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
3faee064-c4f7-4d34-b6f3-8d81c2b6a15d Nick Kolesnikov male 1998-12-26 bmp 412-687-5007\0 1 2018-11-19 05:59:59 20 capricorn ['Havana'] [128,0,128] Pittsburgh [40.517192,-79.949456] 3.1415926535898 50000000000.00 780 18.3 195500007 ['ounce','carat','gram'] [28.35,0.2,1] 9494 []
2 4
3 9
ok
ok

View File

@ -1,77 +0,0 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
DROP TABLE IF EXISTS in_persons_00825;
DROP TABLE IF EXISTS in_squares_00825;
CREATE TABLE in_persons_00825 (uuid UUID,
name String,
surname String,
gender Enum8('male'=1, 'female'=0),
birthDate Date,
photo Nullable(String),
phoneNumber Nullable(FixedString(13)),
isOnline UInt8,
visitTime Nullable(DateTime),
age UInt8,
zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
'pisces'=219),
songs Array(String),
color Array(UInt8),
hometown LowCardinality(String),
location Array(Decimal32(6)),
pi Nullable(Float64),
lotteryWin Nullable(Decimal64(2)),
someRatio Float32,
temperature Decimal32(1),
randomBigNumber Int64,
measureUnits Nested (unit String, coef Float32),
nestiness_a_b_c_d Nullable(UInt32),
`nestiness_a_B.c_E` Array(UInt32)
) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE in_squares_00825 (number UInt32, square UInt32) ENGINE = MergeTree ORDER BY tuple();
EOF
# To generate the file 00825_protobuf_format_input.insh use the following commands:
# ninja ProtobufDelimitedMessagesSerializer
# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
# shellcheck source=./00825_protobuf_format_input.insh
source "$CURDIR"/00825_protobuf_format_input.insh
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
$CLICKHOUSE_CLIENT --query "TRUNCATE TABLE in_persons_00825;"
$CLICKHOUSE_CLIENT --query "TRUNCATE TABLE in_squares_00825;"
# shellcheck source=./00825_protobuf_format_input_single.insh
source "$CURDIR"/00825_protobuf_format_input_single.insh
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
# Try to input malformed data.
set +eo pipefail
echo -ne '\xe0\x80\x3f\x0b' \
| $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
| grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
set -eo pipefail
# Try to input malformed data for ProtobufSingle
set +eo pipefail
echo -ne '\xff\xff\x3f\x0b' \
| $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
| grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
set -eo pipefail
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"

View File

@ -1,12 +0,0 @@
echo -ne '\x0a\x24\x61\x37\x35\x32\x32\x31\x35\x38\x2d\x33\x64\x34\x31\x2d\x34\x62\x37\x37\x2d\x61\x64\x36\x39\x2d\x36\x63\x35\x39\x38\x65\x65\x35\x35\x63\x34\x39\x12\x04\x49\x76\x61\x6e\x1a\x06\x50\x65\x74\x72\x6f\x76\x20\x01\x28\xaf\x1f\x32\x03\x70\x6e\x67\x3a\x0c\x2b\x37\x34\x39\x35\x31\x32\x33\x34\x35\x36\x37\x40\x01\x4d\xfc\xd0\x30\x5c\x50\x26\x58\x09\x62\x09\x59\x65\x73\x74\x65\x72\x64\x61\x79\x62\x07\x46\x6c\x6f\x77\x65\x72\x73\x6a\x04\xff\x01\x00\x00\x72\x06\x4d\x6f\x73\x63\x6f\x77\x7a\x08\x4b\x03\x5f\x42\x72\x7d\x16\x42\x81\x01\x1f\x85\xeb\x51\xb8\x1e\x09\x40\x89\x01\x33\x33\x33\x33\x33\xc3\x6a\x40\x95\x01\xcd\xcc\xcc\x3d\x9d\x01\x9a\x99\xb9\x40\xa0\x01\x80\xc4\xd7\x8d\x7f\xaa\x01\x0c\x0a\x05\x6d\x65\x74\x65\x72\x15\x00\x00\x80\x3f\xaa\x01\x11\x0a\x0a\x63\x65\x6e\x74\x69\x6d\x65\x74\x65\x72\x15\x0a\xd7\x23\x3c\xaa\x01\x10\x0a\x09\x6b\x69\x6c\x6f\x6d\x65\x74\x65\x72\x15\x00\x00\x7a\x44\xb2\x01\x10\x0a\x0e\xa2\x06\x0b\x0a\x09\x08\xf4\x03\x12\x04\xf5\x03\xf6\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
echo -ne '\x0a\x24\x63\x36\x39\x34\x61\x64\x38\x61\x2d\x66\x37\x31\x34\x2d\x34\x65\x61\x33\x2d\x39\x30\x37\x64\x2d\x66\x64\x35\x34\x66\x62\x32\x35\x64\x39\x62\x35\x12\x07\x4e\x61\x74\x61\x6c\x69\x61\x1a\x08\x53\x6f\x6b\x6f\x6c\x6f\x76\x61\x28\xa6\x3f\x32\x03\x6a\x70\x67\x50\x1a\x58\x0b\x6a\x04\x64\xc8\x01\x32\x72\x08\x50\x6c\x79\x6d\x6f\x75\x74\x68\x7a\x08\x6a\x9d\x49\x42\x46\x8c\x84\xc0\x81\x01\x6e\x86\x1b\xf0\xf9\x21\x09\x40\x95\x01\x42\x60\xe5\x3b\x9d\x01\xcd\xcc\xac\x40\xa0\x01\xff\xff\xa9\xce\x93\x8c\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
echo -ne '\x0a\x24\x61\x37\x64\x61\x31\x61\x61\x36\x2d\x66\x34\x32\x35\x2d\x34\x37\x38\x39\x2d\x38\x39\x34\x37\x2d\x62\x30\x33\x34\x37\x38\x36\x65\x64\x33\x37\x34\x12\x06\x56\x61\x73\x69\x6c\x79\x1a\x07\x53\x69\x64\x6f\x72\x6f\x76\x20\x01\x28\xfb\x48\x32\x03\x62\x6d\x70\x3a\x0d\x2b\x34\x34\x32\x30\x31\x32\x33\x34\x35\x36\x37\x38\x40\x01\x4d\x50\xe0\x27\x5c\x50\x17\x58\x04\x62\x05\x53\x75\x6e\x6e\x79\x6a\x05\xfa\x01\xf4\x01\x0a\x72\x08\x4d\x75\x72\x6d\x61\x6e\x73\x6b\x7a\x08\xfd\xf0\x89\x42\xc8\x4c\x04\x42\x81\x01\x11\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x37\x42\x95\x01\x00\x00\x48\x44\x9d\x01\xcd\xcc\x4c\xc0\xa0\x01\x80\xd4\x9f\x93\x01\xaa\x01\x0c\x0a\x05\x70\x6f\x75\x6e\x64\x15\x00\x00\x80\x41\xb2\x01\x0a\x0a\x08\xa2\x06\x05\x0a\x03\x08\xf7\x03' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'"
echo -ne '\x0a\x24\x33\x66\x61\x65\x65\x30\x36\x34\x2d\x63\x34\x66\x37\x2d\x34\x64\x33\x34\x2d\x62\x36\x66\x33\x2d\x38\x64\x38\x31\x63\x32\x62\x36\x61\x31\x35\x64\x12\x04\x4e\x69\x63\x6b\x1a\x0a\x4b\x6f\x6c\x65\x73\x6e\x69\x6b\x6f\x76\x20\x01\x28\xda\x52\x32\x03\x62\x6d\x70\x3a\x0c\x34\x31\x32\x2d\x36\x38\x37\x2d\x35\x30\x30\x37\x40\x01\x4d\x2f\x27\xf2\x5b\x50\x14\x58\x09\x62\x06\x48\x61\x76\x61\x6e\x61\x68\x80\x01\x68\x00\x68\x80\x01\x72\x0a\x50\x69\x74\x74\x73\x62\x75\x72\x67\x68\x7a\x08\x9b\x11\x22\x42\x1f\xe6\x9f\xc2\x81\x01\x28\x2d\x44\x54\xfb\x21\x09\x40\x89\x01\x00\x00\x00\xe8\x76\x48\x27\x42\x95\x01\x00\x00\x43\x44\x9d\x01\x66\x66\x92\x41\xa0\x01\xce\xdf\xb8\xba\x01\xab\x01\x0d\xcd\xcc\xe2\x41\x0d\xcd\xcc\x4c\x3e\x0d\x00\x00\x80\x3f\x12\x05\x6f\x75\x6e\x63\x65\x12\x05\x63\x61\x72\x61\x74\x12\x04\x67\x72\x61\x6d\xac\x01\xb3\x01\x0b\xa2\x06\x05\x0b\x08\x96\x4a\x0c\x0c\xb4\x01' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person'"
echo -ne '\x08\x02\x10\x04' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
echo -ne '\x08\x03\x10\x09' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"
### Actually empty Protobuf message is a valid message (with all values default).
### It will work in Kafka but clickhouse-client forbids that:
### Code: 108. DB::Exception: No data to insert
## echo -ne '' | $CLICKHOUSE_CLIENT --query="INSERT INTO in_squares_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare'"

View File

@ -1,31 +1,30 @@
#!/usr/bin/env bash
# https://github.com/ClickHouse/ClickHouse/issues/6497
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
SET allow_experimental_map_type = 1;
DROP TABLE IF EXISTS map_00825;
DROP TABLE IF EXISTS map_protobuf_00825;
CREATE TABLE map_00825
CREATE TABLE map_protobuf_00825
(
a Map(String, UInt32)
) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
INSERT INTO map_protobuf_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0});
SELECT * FROM map_00825;
SELECT * FROM map_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
@ -34,7 +33,8 @@ hexdump -C $BINARY_FILE_PATH
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO map_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM map_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE map_protobuf_00825"

View File

@ -0,0 +1,52 @@
[[(1),(2)],[(3),(4),(5)]]
[[(6)]]
[[]]
[]
Binary representation:
00000000 18 0a 08 12 02 18 01 12 02 18 02 0a 0c 12 02 18 |................|
00000010 03 12 02 18 04 12 02 18 05 06 0a 04 12 02 18 06 |................|
00000020 02 0a 00 00 |....|
00000024
MESSAGE #1 AT 0x00000001
x {
y {
z: 1
}
y {
z: 2
}
}
x {
y {
z: 3
}
y {
z: 4
}
y {
z: 5
}
}
MESSAGE #2 AT 0x0000001A
x {
y {
z: 6
}
}
MESSAGE #3 AT 0x00000021
x {
}
MESSAGE #4 AT 0x00000024
Binary representation is as expected
[[(1),(2)],[(3),(4),(5)]]
[[(6)]]
[[]]
[]
[[(1),(2)],[(3),(4),(5)]]
[[(6)]]
[[]]
[]

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
# https://github.com/ClickHouse/ClickHouse/issues/11117
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS nested_in_nested_protobuf_00825;
CREATE TABLE nested_in_nested_protobuf_00825 (x Nested (y Nested (z Int64))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO nested_in_nested_protobuf_00825 VALUES ([[(1),(2)],[(3),(4),(5)]]), ([[(6)]]), ([[]]), ([]);
SELECT * FROM nested_in_nested_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_in_nested.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_in_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO nested_in_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_in_nested_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE nested_in_nested_protobuf_00825"

View File

@ -3,13 +3,14 @@
# https://github.com/ClickHouse/ClickHouse/issues/6497
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS nested_optional_protobuf_00825;
CREATE TABLE nested_optional_protobuf_00825
@ -27,15 +28,16 @@ SELECT * FROM nested_optional_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE nested_optional_protobuf_00825"

View File

@ -0,0 +1,13 @@
1000 1K
2000 2K
3000 3K
Binary representation:
00000000 08 e8 07 12 02 31 4b |.....1K|
00000007
x: 1000
str: "1K"
Roundtrip:
1000 1K

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS no_length_delimiter_protobuf_00825;
DROP TABLE IF EXISTS roundtrip_no_length_delimiter_protobuf_00825;
CREATE TABLE no_length_delimiter_protobuf_00825
(
x Int32,
str String
) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO no_length_delimiter_protobuf_00825 VALUES (1000, '1K'), (2000, '2K'), (3000, '3K');
SELECT * FROM no_length_delimiter_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_no_length_delimiter.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM no_length_delimiter_protobuf_00825 LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'" > "$BINARY_FILE_PATH"
# Check the output in the ProtobufSingle format
echo
echo "Binary representation:"
hexdump -C $BINARY_FILE_PATH
echo
(cd $SCHEMADIR && protoc --decode Message 00825_protobuf_format_no_length_delimiter.proto) < $BINARY_FILE_PATH
# Check the input in the ProtobufSingle format.
echo
echo "Roundtrip:"
$CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_no_length_delimiter_protobuf_00825 AS no_length_delimiter_protobuf_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_no_length_delimiter_protobuf_00825 FORMAT ProtobufSingle SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_no_length_delimiter_protobuf_00825"
rm "$BINARY_FILE_PATH"
# The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.
$CLICKHOUSE_CLIENT --multiquery --testmode > /dev/null <<EOF
SELECT * FROM no_length_delimiter_protobuf_00825 FORMAT ProtobufSingle SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'; -- { clientError 546 }
EOF
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE no_length_delimiter_protobuf_00825;
DROP TABLE roundtrip_no_length_delimiter_protobuf_00825;
EOF

View File

@ -1,79 +0,0 @@
#!/usr/bin/env bash
# To generate reference file for this test use the following commands:
# ninja ProtobufDelimitedMessagesSerializer
# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -e -o pipefail
# Run the client.
$CLICKHOUSE_CLIENT -mnT <<EOF
DROP TABLE IF EXISTS out_persons_00825;
DROP TABLE IF EXISTS out_squares_00825;
CREATE TABLE out_persons_00825 (uuid UUID,
name String,
surname String,
gender Enum8('male'=1, 'female'=0),
birthDate Date,
photo Nullable(String),
phoneNumber Nullable(FixedString(13)),
isOnline UInt8,
visitTime Nullable(DateTime),
age UInt8,
zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
'pisces'=219),
songs Array(String),
color Array(UInt8),
hometown LowCardinality(String),
location Array(Decimal32(6)),
pi Nullable(Float64),
lotteryWin Nullable(Decimal64(2)),
someRatio Float32,
temperature Decimal32(1),
randomBigNumber Int64,
measureUnits Nested(unit String, coef Float32),
nestiness_a_b_c_d Nullable(UInt32),
\`nestiness_a_B.c_E\` Array(UInt32)
) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE out_squares_00825 (number UInt32, square UInt64) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO out_persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]);
INSERT INTO out_persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []);
INSERT INTO out_persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []);
INSERT INTO out_squares_00825 VALUES (2, 4), (0, 0), (3, 9);
SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person';
SELECT 'ALTERNATIVE->';
SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson';
SELECT 'STRINGS->';
SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson';
SELECT 'SYNTAX2->';
SELECT * FROM out_persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person';
SELECT 'SQUARES->';
SELECT * FROM out_squares_00825 ORDER BY number FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare';
SELECT '\n\n** ProtobufSingle **\n\n';
SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person';
SELECT 'ALTERNATIVE->';
SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:AltPerson';
SELECT 'STRINGS->';
SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:StrPerson';
SELECT 'SYNTAX2->';
SELECT * FROM out_persons_00825 ORDER BY name LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format_syntax2:Syntax2Person';
SELECT 'SQUARES->';
SELECT * FROM out_squares_00825 ORDER BY number LIMIT 1 FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:NumberAndSquare';
-- Code: 546, e.displayText() = DB::Exception: The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.
SELECT * FROM out_persons_00825 ORDER BY name FORMAT ProtobufSingle SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'; -- { clientError 546 }
DROP TABLE IF EXISTS out_persons_00825;
DROP TABLE IF EXISTS out_squares_00825;
EOF

View File

@ -0,0 +1,569 @@
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753215,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 jpg \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 bmp +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970682,33.074981] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []
Schema 00825_protobuf_format_persons:Person
Binary representation:
00000000 f4 01 0a 24 61 37 35 32 32 31 35 38 2d 33 64 34 |...$a7522158-3d4|
00000010 31 2d 34 62 37 37 2d 61 64 36 39 2d 36 63 35 39 |1-4b77-ad69-6c59|
00000020 38 65 65 35 35 63 34 39 12 04 49 76 61 6e 1a 06 |8ee55c49..Ivan..|
00000030 50 65 74 72 6f 76 20 01 28 af 1f 32 03 70 6e 67 |Petrov .(..2.png|
00000040 3a 0d 2b 37 34 39 35 31 32 33 34 35 36 37 00 40 |:.+74951234567.@|
00000050 01 4d fc d0 30 5c 50 26 58 09 62 09 59 65 73 74 |.M..0\P&X.b.Yest|
00000060 65 72 64 61 79 62 07 46 6c 6f 77 65 72 73 6a 04 |erdayb.Flowersj.|
00000070 ff 01 00 00 72 06 4d 6f 73 63 6f 77 7a 08 4b 03 |....r.Moscowz.K.|
00000080 5f 42 72 7d 16 42 81 01 1f 85 eb 51 b8 1e 09 40 |_Br}.B.....Q...@|
00000090 89 01 33 33 33 33 33 c3 6a 40 95 01 cd cc cc 3d |..33333.j@.....=|
000000a0 9d 01 9a 99 b9 40 a0 01 80 c4 d7 8d 7f aa 01 0c |.....@..........|
000000b0 0a 05 6d 65 74 65 72 15 00 00 80 3f aa 01 11 0a |..meter....?....|
000000c0 0a 63 65 6e 74 69 6d 65 74 65 72 15 0a d7 23 3c |.centimeter...#<|
000000d0 aa 01 10 0a 09 6b 69 6c 6f 6d 65 74 65 72 15 00 |.....kilometer..|
000000e0 00 7a 44 b2 01 10 0a 0e a2 06 0b 0a 09 08 f4 03 |.zD.............|
000000f0 12 04 f5 03 f6 03 7e 0a 24 63 36 39 34 61 64 38 |......~.$c694ad8|
00000100 61 2d 66 37 31 34 2d 34 65 61 33 2d 39 30 37 64 |a-f714-4ea3-907d|
00000110 2d 66 64 35 34 66 62 32 35 64 39 62 35 12 07 4e |-fd54fb25d9b5..N|
00000120 61 74 61 6c 69 61 1a 08 53 6f 6b 6f 6c 6f 76 61 |atalia..Sokolova|
00000130 28 a6 3f 32 03 6a 70 67 50 1a 58 0b 6a 04 64 c8 |(.?2.jpgP.X.j.d.|
00000140 01 32 72 08 50 6c 79 6d 6f 75 74 68 7a 08 6a 9d |.2r.Plymouthz.j.|
00000150 49 42 46 8c 84 c0 81 01 6e 86 1b f0 f9 21 09 40 |IBF.....n....!.@|
00000160 95 01 42 60 e5 3b 9d 01 cd cc ac 40 a0 01 ff ff |..B`.;.....@....|
00000170 a9 ce 93 8c 09 c0 01 0a 24 61 37 64 61 31 61 61 |........$a7da1aa|
00000180 36 2d 66 34 32 35 2d 34 37 38 39 2d 38 39 34 37 |6-f425-4789-8947|
00000190 2d 62 30 33 34 37 38 36 65 64 33 37 34 12 06 56 |-b034786ed374..V|
000001a0 61 73 69 6c 79 1a 07 53 69 64 6f 72 6f 76 20 01 |asily..Sidorov .|
000001b0 28 fb 48 32 03 62 6d 70 3a 0d 2b 34 34 32 30 31 |(.H2.bmp:.+44201|
000001c0 32 33 34 35 36 37 38 40 01 4d 50 e0 27 5c 50 17 |2345678@.MP.'\P.|
000001d0 58 04 62 05 53 75 6e 6e 79 6a 05 fa 01 f4 01 0a |X.b.Sunnyj......|
000001e0 72 08 4d 75 72 6d 61 6e 73 6b 7a 08 fd f0 89 42 |r.Murmanskz....B|
000001f0 c8 4c 04 42 81 01 11 2d 44 54 fb 21 09 40 89 01 |.L.B...-DT.!.@..|
00000200 00 00 00 e8 76 48 37 42 95 01 00 00 48 44 9d 01 |....vH7B....HD..|
00000210 cd cc 4c c0 a0 01 80 d4 9f 93 01 aa 01 0c 0a 05 |..L.............|
00000220 70 6f 75 6e 64 15 00 00 80 41 b2 01 0a 0a 08 a2 |pound....A......|
00000230 06 05 0a 03 08 f7 03 |.......|
00000237
MESSAGE #1 AT 0x00000002
uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
name: "Ivan"
surname: "Petrov"
gender: male
birthDate: 4015
photo: "png"
phoneNumber: "+74951234567\000"
isOnline: true
visitTime: 1546703100
age: 38
zodiacSign: capricorn
songs: "Yesterday"
songs: "Flowers"
color: 255
color: 0
color: 0
hometown: "Moscow"
location: 55.7532158
location: 37.6225052
pi: 3.14
lotteryWin: 214.1
someRatio: 0.1
temperature: 5.8
randomBigNumber: 17060000000
measureUnits {
unit: "meter"
coef: 1
}
measureUnits {
unit: "centimeter"
coef: 0.01
}
measureUnits {
unit: "kilometer"
coef: 1000
}
nestiness {
a {
b {
c {
d: 500
e: 501
e: 502
}
}
}
}
MESSAGE #2 AT 0x000000F7
uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
name: "Natalia"
surname: "Sokolova"
birthDate: 8102
photo: "jpg"
age: 26
zodiacSign: pisces
color: 100
color: 200
color: 50
hometown: "Plymouth"
location: 50.4037247
location: -4.14212322
pi: 3.14159
someRatio: 0.007
temperature: 5.4
randomBigNumber: -20000000000000
MESSAGE #3 AT 0x00000177
uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
name: "Vasily"
surname: "Sidorov"
gender: male
birthDate: 9339
photo: "bmp"
phoneNumber: "+442012345678"
isOnline: true
visitTime: 1546117200
age: 23
zodiacSign: leo
songs: "Sunny"
color: 250
color: 244
color: 10
hometown: "Murmansk"
location: 68.9706802
location: 33.0749817
pi: 3.14159265358979
lotteryWin: 100000000000
someRatio: 800
temperature: -3.2
randomBigNumber: 154400000
measureUnits {
unit: "pound"
coef: 16
}
nestiness {
a {
b {
c {
d: 503
}
}
}
}
Binary representation is as expected
Roundtrip:
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753216,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 jpg \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 bmp +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970680,33.074982] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []
Schema 00825_protobuf_format_persons:AltPerson
Binary representation:
00000000 c4 01 08 01 12 04 49 76 61 6e 28 87 a8 c4 9b 97 |......Ivan(.....|
00000010 02 52 06 50 65 74 72 6f 76 72 0c 00 00 7f 43 00 |.R.Petrovr....C.|
00000020 00 00 00 00 00 00 00 79 fc d0 30 5c 00 00 00 00 |.......y..0\....|
00000030 c8 02 0a c2 05 0c 00 00 80 3f 0a d7 23 3c 00 00 |.........?..#<..|
00000040 7a 44 9a 06 05 6d 65 74 65 72 9a 06 0a 63 65 6e |zD...meter...cen|
00000050 74 69 6d 65 74 65 72 9a 06 09 6b 69 6c 6f 6d 65 |timeter...kilome|
00000060 74 65 72 a1 06 00 00 00 a0 99 99 b9 3f a8 06 37 |ter.........?..7|
00000070 a8 06 25 bd 06 c3 f5 48 40 fa 06 02 33 38 90 08 |..%....H@...38..|
00000080 c6 09 e1 08 00 f1 da f8 03 00 00 00 b0 09 af 1f |................|
00000090 d0 0c d6 01 e2 12 24 61 37 35 32 32 31 35 38 2d |......$a7522158-|
000000a0 33 64 34 31 2d 34 62 37 37 2d 61 64 36 39 2d 36 |3d41-4b77-ad69-6|
000000b0 63 35 39 38 65 65 35 35 63 34 39 a0 38 f4 03 aa |c598ee55c49.8...|
000000c0 38 04 f5 03 f6 03 84 01 12 07 4e 61 74 61 6c 69 |8.........Natali|
000000d0 61 52 08 53 6f 6b 6f 6c 6f 76 61 72 0c 00 00 c8 |aR.Sokolovar....|
000000e0 42 00 00 48 43 00 00 48 42 c8 02 0a a1 06 00 00 |B..HC..HB.......|
000000f0 00 40 08 ac 7c 3f a8 06 32 a8 06 fc ff ff ff ff |.@..|?..2.......|
00000100 ff ff ff ff 01 b0 06 01 bd 06 d0 0f 49 40 fa 06 |............I@..|
00000110 02 32 36 90 08 db 01 e1 08 00 c0 1a 63 cf ed ff |.26.........c...|
00000120 ff b0 09 a6 3f e2 12 24 63 36 39 34 61 64 38 61 |....?..$c694ad8a|
00000130 2d 66 37 31 34 2d 34 65 61 33 2d 39 30 37 64 2d |-f714-4ea3-907d-|
00000140 66 64 35 34 66 62 32 35 64 39 62 35 a3 01 08 01 |fd54fb25d9b5....|
00000150 12 06 56 61 73 69 6c 79 28 ce ca f4 cf ee 0c 52 |..Vasily(......R|
00000160 07 53 69 64 6f 72 6f 76 72 0c 00 00 7a 43 00 00 |.Sidorovr...zC..|
00000170 74 43 00 00 20 41 79 50 e0 27 5c 00 00 00 00 c8 |tC.. AyP.'\.....|
00000180 02 05 c2 05 04 00 00 80 41 9a 06 05 70 6f 75 6e |........A...poun|
00000190 64 a1 06 00 00 00 00 00 00 89 40 a8 06 44 a8 06 |d.........@..D..|
000001a0 21 bd 06 db 0f 49 40 fa 06 02 32 33 90 08 d3 05 |!....I@...23....|
000001b0 e1 08 00 f5 33 09 00 00 00 00 b0 09 fb 48 d0 0c |....3........H..|
000001c0 80 d0 db c3 f4 02 e2 12 24 61 37 64 61 31 61 61 |........$a7da1aa|
000001d0 36 2d 66 34 32 35 2d 34 37 38 39 2d 38 39 34 37 |6-f425-4789-8947|
000001e0 2d 62 30 33 34 37 38 36 65 64 33 37 34 a0 38 f7 |-b034786ed374.8.|
000001f0 03 |.|
000001f1
MESSAGE #1 AT 0x00000002
isOnline: online
name: "Ivan"
phoneNumber: 74951234567
surname: "Petrov"
color: 255
color: 0
color: 0
visitTime: 1546703100
temperature: 5
measureUnits_coef: 1
measureUnits_coef: 0.01
measureUnits_coef: 1000
measureUnits_unit: "meter"
measureUnits_unit: "centimeter"
measureUnits_unit: "kilometer"
someRatio: 0.10000000149011612
location: 55
location: 37
pi: 3.14
age: "38"
zodiacSign: 1222
randomBigNumber: 17060000000
birthDate: 4015
lotteryWin: 214
uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
nestiness_a_b_c_d: 500
nestiness_a_b_c_e: 501
nestiness_a_b_c_e: 502
MESSAGE #2 AT 0x000000C8
name: "Natalia"
surname: "Sokolova"
color: 100
color: 200
color: 50
temperature: 5
someRatio: 0.0070000002160668373
location: 50
location: -4
gender: female
pi: 3.14159
age: "26"
zodiacSign: 219
randomBigNumber: -20000000000000
birthDate: 8102
uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
MESSAGE #3 AT 0x0000014E
isOnline: online
name: "Vasily"
phoneNumber: 442012345678
surname: "Sidorov"
color: 250
color: 244
color: 10
visitTime: 1546117200
temperature: -3
measureUnits_coef: 16
measureUnits_unit: "pound"
someRatio: 800
location: 68
location: 33
pi: 3.14159274
age: "23"
zodiacSign: 723
randomBigNumber: 154400000
birthDate: 9339
lotteryWin: 100000000000
uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
nestiness_a_b_c_d: 503
Binary representation is as expected
Roundtrip:
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 \N 74951234567\0\0 1 2019-01-05 18:45:00 38 capricorn [] [255,0,0] [55.000000,37.000000] 3.140000104904175 214.00 0.1 5.0 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 \N \N 0 \N 26 pisces [] [100,200,50] [50.000000,-4.000000] 3.141590118408203 \N 0.007 5.0 -20000000000000 [] [] \N []
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 \N 442012345678\0 1 2018-12-30 00:00:00 23 leo [] [250,244,10] [68.000000,33.000000] 3.1415927410125732 100000000000.00 800 -3.0 154400000 ['pound'] [16] 503 []
Schema 00825_protobuf_format_persons:StrPerson
Binary representation:
00000000 a7 02 0a 24 61 37 35 32 32 31 35 38 2d 33 64 34 |...$a7522158-3d4|
00000010 31 2d 34 62 37 37 2d 61 64 36 39 2d 36 63 35 39 |1-4b77-ad69-6c59|
00000020 38 65 65 35 35 63 34 39 12 04 49 76 61 6e 1a 06 |8ee55c49..Ivan..|
00000030 50 65 74 72 6f 76 22 04 6d 61 6c 65 2a 0a 31 39 |Petrov".male*.19|
00000040 38 30 2d 31 32 2d 32 39 3a 0d 2b 37 34 39 35 31 |80-12-29:.+74951|
00000050 32 33 34 35 36 37 00 42 01 31 4a 13 32 30 31 39 |234567.B.1J.2019|
00000060 2d 30 31 2d 30 35 20 31 38 3a 34 35 3a 30 30 52 |-01-05 18:45:00R|
00000070 02 33 38 5a 09 63 61 70 72 69 63 6f 72 6e 62 09 |.38Z.capricornb.|
00000080 59 65 73 74 65 72 64 61 79 62 07 46 6c 6f 77 65 |Yesterdayb.Flowe|
00000090 72 73 6a 03 32 35 35 6a 01 30 6a 01 30 72 06 4d |rsj.255j.0j.0r.M|
000000a0 6f 73 63 6f 77 7a 09 35 35 2e 37 35 33 32 31 35 |oscowz.55.753215|
000000b0 7a 09 33 37 2e 36 32 32 35 30 34 82 01 04 33 2e |z.37.622504...3.|
000000c0 31 34 8a 01 06 32 31 34 2e 31 30 92 01 03 30 2e |14...214.10...0.|
000000d0 31 9a 01 03 35 2e 38 a2 01 0b 31 37 30 36 30 30 |1...5.8...170600|
000000e0 30 30 30 30 30 aa 01 2d 0a 05 6d 65 74 65 72 0a |00000..-..meter.|
000000f0 0a 63 65 6e 74 69 6d 65 74 65 72 0a 09 6b 69 6c |.centimeter..kil|
00000100 6f 6d 65 74 65 72 12 01 31 12 04 30 2e 30 31 12 |ometer..1..0.01.|
00000110 04 31 30 30 30 b2 01 11 0a 0f 0a 03 35 30 30 12 |.1000.......500.|
00000120 03 35 30 31 12 03 35 30 32 b4 01 0a 24 63 36 39 |.501..502...$c69|
00000130 34 61 64 38 61 2d 66 37 31 34 2d 34 65 61 33 2d |4ad8a-f714-4ea3-|
00000140 39 30 37 64 2d 66 64 35 34 66 62 32 35 64 39 62 |907d-fd54fb25d9b|
00000150 35 12 07 4e 61 74 61 6c 69 61 1a 08 53 6f 6b 6f |5..Natalia..Soko|
00000160 6c 6f 76 61 22 06 66 65 6d 61 6c 65 2a 0a 31 39 |lova".female*.19|
00000170 39 32 2d 30 33 2d 30 38 42 01 30 52 02 32 36 5a |92-03-08B.0R.26Z|
00000180 06 70 69 73 63 65 73 6a 03 31 30 30 6a 03 32 30 |.piscesj.100j.20|
00000190 30 6a 02 35 30 72 08 50 6c 79 6d 6f 75 74 68 7a |0j.50r.Plymouthz|
000001a0 09 35 30 2e 34 30 33 37 32 34 7a 09 2d 34 2e 31 |.50.403724z.-4.1|
000001b0 34 32 31 32 33 82 01 07 33 2e 31 34 31 35 39 92 |42123...3.14159.|
000001c0 01 05 30 2e 30 30 37 9a 01 03 35 2e 34 a2 01 0f |..0.007...5.4...|
000001d0 2d 32 30 30 30 30 30 30 30 30 30 30 30 30 30 84 |-20000000000000.|
000001e0 02 0a 24 61 37 64 61 31 61 61 36 2d 66 34 32 35 |..$a7da1aa6-f425|
000001f0 2d 34 37 38 39 2d 38 39 34 37 2d 62 30 33 34 37 |-4789-8947-b0347|
00000200 38 36 65 64 33 37 34 12 06 56 61 73 69 6c 79 1a |86ed374..Vasily.|
00000210 07 53 69 64 6f 72 6f 76 22 04 6d 61 6c 65 2a 0a |.Sidorov".male*.|
00000220 31 39 39 35 2d 30 37 2d 32 38 3a 0d 2b 34 34 32 |1995-07-28:.+442|
00000230 30 31 32 33 34 35 36 37 38 42 01 31 4a 13 32 30 |012345678B.1J.20|
00000240 31 38 2d 31 32 2d 33 30 20 30 30 3a 30 30 3a 30 |18-12-30 00:00:0|
00000250 30 52 02 32 33 5a 03 6c 65 6f 62 05 53 75 6e 6e |0R.23Z.leob.Sunn|
00000260 79 6a 03 32 35 30 6a 03 32 34 34 6a 02 31 30 72 |yj.250j.244j.10r|
00000270 08 4d 75 72 6d 61 6e 73 6b 7a 09 36 38 2e 39 37 |.Murmanskz.68.97|
00000280 30 36 38 32 7a 09 33 33 2e 30 37 34 39 38 31 82 |0682z.33.074981.|
00000290 01 10 33 2e 31 34 31 35 39 32 36 35 33 35 38 39 |..3.141592653589|
000002a0 37 39 8a 01 0f 31 30 30 30 30 30 30 30 30 30 30 |79...10000000000|
000002b0 30 2e 30 30 92 01 03 38 30 30 9a 01 04 2d 33 2e |0.00...800...-3.|
000002c0 32 a2 01 09 31 35 34 34 30 30 30 30 30 aa 01 0b |2...154400000...|
000002d0 0a 05 70 6f 75 6e 64 12 02 31 36 b2 01 07 0a 05 |..pound..16.....|
000002e0 0a 03 35 30 33 |..503|
000002e5
MESSAGE #1 AT 0x00000002
uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
name: "Ivan"
surname: "Petrov"
gender: "male"
birthDate: "1980-12-29"
phoneNumber: "+74951234567\000"
isOnline: "1"
visitTime: "2019-01-05 18:45:00"
age: "38"
zodiacSign: "capricorn"
songs: "Yesterday"
songs: "Flowers"
color: "255"
color: "0"
color: "0"
hometown: "Moscow"
location: "55.753215"
location: "37.622504"
pi: "3.14"
lotteryWin: "214.10"
someRatio: "0.1"
temperature: "5.8"
randomBigNumber: "17060000000"
measureUnits {
unit: "meter"
unit: "centimeter"
unit: "kilometer"
coef: "1"
coef: "0.01"
coef: "1000"
}
nestiness_a {
b_c {
d: "500"
e: "501"
e: "502"
}
}
MESSAGE #2 AT 0x0000012B
uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
name: "Natalia"
surname: "Sokolova"
gender: "female"
birthDate: "1992-03-08"
isOnline: "0"
age: "26"
zodiacSign: "pisces"
color: "100"
color: "200"
color: "50"
hometown: "Plymouth"
location: "50.403724"
location: "-4.142123"
pi: "3.14159"
someRatio: "0.007"
temperature: "5.4"
randomBigNumber: "-20000000000000"
MESSAGE #3 AT 0x000001E1
uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
name: "Vasily"
surname: "Sidorov"
gender: "male"
birthDate: "1995-07-28"
phoneNumber: "+442012345678"
isOnline: "1"
visitTime: "2018-12-30 00:00:00"
age: "23"
zodiacSign: "leo"
songs: "Sunny"
color: "250"
color: "244"
color: "10"
hometown: "Murmansk"
location: "68.970682"
location: "33.074981"
pi: "3.14159265358979"
lotteryWin: "100000000000.00"
someRatio: "800"
temperature: "-3.2"
randomBigNumber: "154400000"
measureUnits {
unit: "pound"
coef: "16"
}
nestiness_a {
b_c {
d: "503"
}
}
Binary representation is as expected
Roundtrip:
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 \N +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753215,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 \N \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 \N +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970682,33.074981] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []
Schema 00825_protobuf_format_syntax2:Syntax2Person
Binary representation:
00000000 f1 01 0a 24 61 37 35 32 32 31 35 38 2d 33 64 34 |...$a7522158-3d4|
00000010 31 2d 34 62 37 37 2d 61 64 36 39 2d 36 63 35 39 |1-4b77-ad69-6c59|
00000020 38 65 65 35 35 63 34 39 12 04 49 76 61 6e 1a 06 |8ee55c49..Ivan..|
00000030 50 65 74 72 6f 76 20 01 28 af 1f 32 03 70 6e 67 |Petrov .(..2.png|
00000040 3a 0d 2b 37 34 39 35 31 32 33 34 35 36 37 00 40 |:.+74951234567.@|
00000050 01 4d fc d0 30 5c 50 26 58 09 62 09 59 65 73 74 |.M..0\P&X.b.Yest|
00000060 65 72 64 61 79 62 07 46 6c 6f 77 65 72 73 68 ff |erdayb.Flowersh.|
00000070 01 68 00 68 00 72 06 4d 6f 73 63 6f 77 7a 08 4b |.h.h.r.Moscowz.K|
00000080 03 5f 42 72 7d 16 42 81 01 1f 85 eb 51 b8 1e 09 |._Br}.B.....Q...|
00000090 40 89 01 33 33 33 33 33 c3 6a 40 95 01 cd cc cc |@..33333.j@.....|
000000a0 3d 9d 01 9a 99 b9 40 a0 01 80 c4 d7 8d 7f ab 01 |=.....@.........|
000000b0 0d 00 00 80 3f 0d 0a d7 23 3c 0d 00 00 7a 44 12 |....?...#<...zD.|
000000c0 05 6d 65 74 65 72 12 0a 63 65 6e 74 69 6d 65 74 |.meter..centimet|
000000d0 65 72 12 09 6b 69 6c 6f 6d 65 74 65 72 ac 01 b3 |er..kilometer...|
000000e0 01 0b a2 06 0b 0b 08 f4 03 10 f5 03 10 f6 03 0c |................|
000000f0 0c b4 01 83 01 0a 24 63 36 39 34 61 64 38 61 2d |......$c694ad8a-|
00000100 66 37 31 34 2d 34 65 61 33 2d 39 30 37 64 2d 66 |f714-4ea3-907d-f|
00000110 64 35 34 66 62 32 35 64 39 62 35 12 07 4e 61 74 |d54fb25d9b5..Nat|
00000120 61 6c 69 61 1a 08 53 6f 6b 6f 6c 6f 76 61 20 00 |alia..Sokolova .|
00000130 28 a6 3f 32 03 6a 70 67 40 00 50 1a 58 0b 68 64 |(.?2.jpg@.P.X.hd|
00000140 68 c8 01 68 32 72 08 50 6c 79 6d 6f 75 74 68 7a |h..h2r.Plymouthz|
00000150 08 6a 9d 49 42 46 8c 84 c0 81 01 6e 86 1b f0 f9 |.j.IBF.....n....|
00000160 21 09 40 95 01 42 60 e5 3b 9d 01 cd cc ac 40 a0 |!.@..B`.;.....@.|
00000170 01 ff ff a9 ce 93 8c 09 c3 01 0a 24 61 37 64 61 |...........$a7da|
00000180 31 61 61 36 2d 66 34 32 35 2d 34 37 38 39 2d 38 |1aa6-f425-4789-8|
00000190 39 34 37 2d 62 30 33 34 37 38 36 65 64 33 37 34 |947-b034786ed374|
000001a0 12 06 56 61 73 69 6c 79 1a 07 53 69 64 6f 72 6f |..Vasily..Sidoro|
000001b0 76 20 01 28 fb 48 32 03 62 6d 70 3a 0d 2b 34 34 |v .(.H2.bmp:.+44|
000001c0 32 30 31 32 33 34 35 36 37 38 40 01 4d 50 e0 27 |2012345678@.MP.'|
000001d0 5c 50 17 58 04 62 05 53 75 6e 6e 79 68 fa 01 68 |\P.X.b.Sunnyh..h|
000001e0 f4 01 68 0a 72 08 4d 75 72 6d 61 6e 73 6b 7a 08 |..h.r.Murmanskz.|
000001f0 fd f0 89 42 c8 4c 04 42 81 01 11 2d 44 54 fb 21 |...B.L.B...-DT.!|
00000200 09 40 89 01 00 00 00 e8 76 48 37 42 95 01 00 00 |.@......vH7B....|
00000210 48 44 9d 01 cd cc 4c c0 a0 01 80 d4 9f 93 01 ab |HD....L.........|
00000220 01 0d 00 00 80 41 12 05 70 6f 75 6e 64 ac 01 b3 |.....A..pound...|
00000230 01 0b a2 06 05 0b 08 f7 03 0c 0c b4 01 |.............|
0000023d
MESSAGE #1 AT 0x00000002
uuid: "a7522158-3d41-4b77-ad69-6c598ee55c49"
name: "Ivan"
surname: "Petrov"
gender: male
birthDate: 4015
photo: "png"
phoneNumber: "+74951234567\000"
isOnline: true
visitTime: 1546703100
age: 38
zodiacSign: capricorn
songs: "Yesterday"
songs: "Flowers"
color: 255
color: 0
color: 0
hometown: "Moscow"
location: 55.7532158
location: 37.6225052
pi: 3.14
lotteryWin: 214.1
someRatio: 0.1
temperature: 5.8
randomBigNumber: 17060000000
MeasureUnits {
coef: 1
coef: 0.01
coef: 1000
unit: "meter"
unit: "centimeter"
unit: "kilometer"
}
Nestiness {
A {
b {
C {
d: 500
e: 501
e: 502
}
}
}
}
MESSAGE #2 AT 0x000000F5
uuid: "c694ad8a-f714-4ea3-907d-fd54fb25d9b5"
name: "Natalia"
surname: "Sokolova"
gender: female
birthDate: 8102
photo: "jpg"
isOnline: false
age: 26
zodiacSign: pisces
color: 100
color: 200
color: 50
hometown: "Plymouth"
location: 50.4037247
location: -4.14212322
pi: 3.14159
someRatio: 0.007
temperature: 5.4
randomBigNumber: -20000000000000
MESSAGE #3 AT 0x0000017A
uuid: "a7da1aa6-f425-4789-8947-b034786ed374"
name: "Vasily"
surname: "Sidorov"
gender: male
birthDate: 9339
photo: "bmp"
phoneNumber: "+442012345678"
isOnline: true
visitTime: 1546117200
age: 23
zodiacSign: leo
songs: "Sunny"
color: 250
color: 244
color: 10
hometown: "Murmansk"
location: 68.9706802
location: 33.0749817
pi: 3.14159265358979
lotteryWin: 100000000000
someRatio: 800
temperature: -3.2
randomBigNumber: 154400000
MeasureUnits {
coef: 16
unit: "pound"
}
Nestiness {
A {
b {
C {
d: 503
}
}
}
}
Binary representation is as expected
Roundtrip:
a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +74951234567\0 1 2019-01-05 18:45:00 38 capricorn ['Yesterday','Flowers'] [255,0,0] Moscow [55.753216,37.622504] 3.14 214.10 0.1 5.8 17060000000 ['meter','centimeter','kilometer'] [1,0.01,1000] 500 [501,502]
c694ad8a-f714-4ea3-907d-fd54fb25d9b5 Natalia Sokolova female 1992-03-08 jpg \N 0 \N 26 pisces [] [100,200,50] Plymouth [50.403724,-4.142123] 3.14159 \N 0.007 5.4 -20000000000000 [] [] \N []
a7da1aa6-f425-4789-8947-b034786ed374 Vasily Sidorov male 1995-07-28 bmp +442012345678 1 2018-12-30 00:00:00 23 leo ['Sunny'] [250,244,10] Murmansk [68.970680,33.074982] 3.14159265358979 100000000000.00 800 -3.2 154400000 ['pound'] [16] 503 []

View File

@ -0,0 +1,118 @@
#!/usr/bin/env bash
# To generate reference file for this test use the following commands:
# ninja ProtobufDelimitedMessagesSerializer
# build/utils/test-data-generator/ProtobufDelimitedMessagesSerializer
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS persons_00825;
DROP TABLE IF EXISTS roundtrip_persons_00825;
DROP TABLE IF EXISTS alt_persons_00825;
DROP TABLE IF EXISTS str_persons_00825;
DROP TABLE IF EXISTS syntax2_persons_00825;
CREATE TABLE persons_00825 (uuid UUID,
name String,
surname String,
gender Enum8('male'=1, 'female'=0),
birthDate Date,
photo Nullable(String),
phoneNumber Nullable(FixedString(13)),
isOnline UInt8,
visitTime Nullable(DateTime),
age UInt8,
zodiacSign Enum16('aries'=321, 'taurus'=420, 'gemini'=521, 'cancer'=621, 'leo'=723, 'virgo'=823,
'libra'=923, 'scorpius'=1023, 'sagittarius'=1122, 'capricorn'=1222, 'aquarius'=120,
'pisces'=219),
songs Array(String),
color Array(UInt8),
hometown LowCardinality(String),
location Array(Decimal32(6)),
pi Nullable(Float64),
lotteryWin Nullable(Decimal64(2)),
someRatio Float32,
temperature Decimal32(1),
randomBigNumber Int64,
measureUnits Nested(unit String, coef Float32),
nestiness_a_b_c_d Nullable(UInt32),
\`nestiness_a_B.c_E\` Array(UInt32)
) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO persons_00825 VALUES (toUUID('a7522158-3d41-4b77-ad69-6c598ee55c49'), 'Ivan', 'Petrov', 'male', toDate('1980-12-29'), 'png', '+74951234567', 1, toDateTime('2019-01-05 18:45:00'), 38, 'capricorn', ['Yesterday', 'Flowers'], [255, 0, 0], 'Moscow', [55.753215, 37.622504], 3.14, 214.10, 0.1, 5.8, 17060000000, ['meter', 'centimeter', 'kilometer'], [1, 0.01, 1000], 500, [501, 502]);
INSERT INTO persons_00825 VALUES (toUUID('c694ad8a-f714-4ea3-907d-fd54fb25d9b5'), 'Natalia', 'Sokolova', 'female', toDate('1992-03-08'), 'jpg', NULL, 0, NULL, 26, 'pisces', [], [100, 200, 50], 'Plymouth', [50.403724, -4.142123], 3.14159, NULL, 0.007, 5.4, -20000000000000, [], [], NULL, []);
INSERT INTO persons_00825 VALUES (toUUID('a7da1aa6-f425-4789-8947-b034786ed374'), 'Vasily', 'Sidorov', 'male', toDate('1995-07-28'), 'bmp', '+442012345678', 1, toDateTime('2018-12-30 00:00:00'), 23, 'leo', ['Sunny'], [250, 244, 10], 'Murmansk', [68.970682, 33.074981], 3.14159265358979, 100000000000, 800, -3.2, 154400000, ['pound'], [16], 503, []);
SELECT * FROM persons_00825 ORDER BY name;
EOF
# Use schema 00825_protobuf_format_persons:Person
echo
echo "Schema 00825_protobuf_format_persons:Person"
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:Person'" > $BINARY_FILE_PATH
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:Person" --input "$BINARY_FILE_PATH"
echo
echo "Roundtrip:"
$CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_persons_00825 AS persons_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:Person'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_persons_00825 ORDER BY name"
rm "$BINARY_FILE_PATH"
# Use schema 00825_protobuf_format_persons:AltPerson
echo
echo "Schema 00825_protobuf_format_persons:AltPerson"
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:AltPerson'" > $BINARY_FILE_PATH
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:AltPerson" --input "$BINARY_FILE_PATH"
echo
echo "Roundtrip:"
$CLICKHOUSE_CLIENT --query "CREATE TABLE alt_persons_00825 AS persons_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:AltPerson'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM alt_persons_00825 ORDER BY name"
rm "$BINARY_FILE_PATH"
# Use schema 00825_protobuf_format_persons:StrPerson
echo
echo "Schema 00825_protobuf_format_persons:StrPerson"
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons:StrPerson'" > $BINARY_FILE_PATH
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons:StrPerson" --input "$BINARY_FILE_PATH"
echo
echo "Roundtrip:"
$CLICKHOUSE_CLIENT --query "CREATE TABLE str_persons_00825 AS persons_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:StrPerson'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM str_persons_00825 ORDER BY name"
rm "$BINARY_FILE_PATH"
# Use schema 00825_protobuf_format_syntax2:Syntax2Person
echo
echo "Schema 00825_protobuf_format_syntax2:Syntax2Person"
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_persons.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM persons_00825 ORDER BY name FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'" > $BINARY_FILE_PATH
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person" --input "$BINARY_FILE_PATH"
echo
echo "Roundtrip:"
$CLICKHOUSE_CLIENT --query "CREATE TABLE syntax2_persons_00825 AS persons_00825"
$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM syntax2_persons_00825 ORDER BY name"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE persons_00825;
DROP TABLE roundtrip_persons_00825;
DROP TABLE alt_persons_00825;
DROP TABLE str_persons_00825;
DROP TABLE syntax2_persons_00825;
EOF

View File

@ -0,0 +1,24 @@
2 4
0 0
3 9
Binary representation:
00000000 04 08 02 10 04 00 04 08 03 10 09 |...........|
0000000b
MESSAGE #1 AT 0x00000001
number: 2
square: 4
MESSAGE #2 AT 0x00000006
MESSAGE #3 AT 0x00000007
number: 3
square: 9
Binary representation is as expected
2 4
0 0
3 9
2 4
0 0
3 9

View File

@ -0,0 +1,34 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS squares_protobuf_00825;
CREATE TABLE squares_protobuf_00825 (number UInt32, square UInt64) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO squares_protobuf_00825 VALUES (2, 4), (0, 0), (3, 9);
SELECT * FROM squares_protobuf_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM squares_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO squares_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM squares_protobuf_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE squares_protobuf_00825"

View File

@ -1,13 +1,14 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS table_default_protobuf_00825;
CREATE TABLE table_default_protobuf_00825
@ -24,15 +25,16 @@ SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
echo
$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE table_default_protobuf_00825"

View File

@ -1,8 +1,8 @@
2 worl [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
2 worl [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
2 worl 1 1 0 4950 99 [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
2 worl 1 1 0 4950 99 [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
arraySort(used_aggregate_functions)
['avg','count','groupBitAnd','sum','uniq']
['avg','count','groupBitAnd','max','sum','uniq']
arraySort(used_aggregate_function_combinators)
['Array','If','OrDefault','OrNull']
@ -11,7 +11,7 @@ arraySort(used_table_functions)
['numbers']
arraySort(used_functions)
['CAST','addDays','array','arrayFlatten','modulo','plus','substring','toDate','toDayOfYear','toTypeName','toWeek']
['CAST','CRC32','addDays','array','arrayFlatten','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek']
arraySort(used_data_type_families)
['Array','Int32','Nullable','String']
@ -20,5 +20,5 @@ used_database_engines
['Atomic']
arraySort(used_data_type_families) used_storages
['DateTime','Int64'] ['Memory']
['Int64','datetime'] ['Memory']

View File

@ -2,6 +2,8 @@ SET database_atomic_wait_for_drop_and_detach_synchronously=1;
SELECT uniqArray([1, 1, 2]),
SUBSTRING('Hello, world', 7, 5),
POW(1, 2), ROUND(TANh(1)), CrC32(''),
SUM(number), MAX(number),
flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]),
week(toDate('2000-12-05')),
CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)),
@ -49,7 +51,7 @@ WHERE current_database = currentDatabase() AND type == 'QueryFinish' AND (query
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
CREATE OR REPLACE TABLE test_query_log_factories_info1.memory_table (id BIGINT, date DateTime) ENGINE=Memory();
CREATE OR REPLACE TABLE test_query_log_factories_info1.memory_table (id BIGINT, date DATETIME) ENGINE=Memory();
SYSTEM FLUSH LOGS;
SELECT arraySort(used_data_type_families), used_storages

View File

@ -0,0 +1 @@
yyy

View File

@ -0,0 +1,9 @@
drop table if exists join_tbl;
create table join_tbl (`id` String, `name` String) engine Join(any, left, id);
insert into join_tbl values ('xxx', 'yyy');
select joinGet('join_tbl', 'name', toLowCardinality('xxx'));
drop table if exists join_tbl;

View File

@ -0,0 +1,2 @@
A
\N

View File

@ -0,0 +1,5 @@
drop table if exists test_num;
create table test_enum (c Nullable(Enum16('A' = 1, 'B' = 2))) engine Log;
insert into test_enum values (1), (NULL);
select * from test_enum;
drop table if exists test_num;

View File

@ -0,0 +1,3 @@
{1:'Ready',2:'Steady',3:'Go'}
{1:'Ready',2:'Steady',3:'Go'}
{1:'Ready',2:'Steady',3:'Go'}

View File

@ -0,0 +1,3 @@
SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
SELECT CAST((['1', '2', '3'], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;

View File

@ -0,0 +1,11 @@
syntax = "proto3";
message MessageType {
message XType {
message YType {
int32 z=3;
};
repeated YType y=2;
};
repeated XType x=1;
};

View File

@ -0,0 +1,6 @@
syntax = "proto3";
message Message {
int32 x = 1;
string str = 2;
};

Some files were not shown because too many files have changed in this diff Show More