mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 04:12:19 +00:00
Merge branch 'master' into to-start-of-interval-hour-align
This commit is contained in:
commit
7fcae60466
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -93,7 +93,7 @@
|
|||||||
url = https://github.com/ClickHouse-Extras/libunwind.git
|
url = https://github.com/ClickHouse-Extras/libunwind.git
|
||||||
[submodule "contrib/simdjson"]
|
[submodule "contrib/simdjson"]
|
||||||
path = contrib/simdjson
|
path = contrib/simdjson
|
||||||
url = https://github.com/ClickHouse-Extras/simdjson.git
|
url = https://github.com/simdjson/simdjson.git
|
||||||
[submodule "contrib/rapidjson"]
|
[submodule "contrib/rapidjson"]
|
||||||
path = contrib/rapidjson
|
path = contrib/rapidjson
|
||||||
url = https://github.com/ClickHouse-Extras/rapidjson
|
url = https://github.com/ClickHouse-Extras/rapidjson
|
||||||
|
@ -1105,11 +1105,11 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename DateOrTime>
|
template <typename DateOrTime>
|
||||||
inline LUTIndex addMonthsIndex(DateOrTime v, Int64 delta) const
|
inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
|
||||||
{
|
{
|
||||||
const Values & values = lut[toLUTIndex(v)];
|
const Values & values = lut[toLUTIndex(v)];
|
||||||
|
|
||||||
Int64 month = static_cast<Int64>(values.month) + delta;
|
Int64 month = values.month + delta;
|
||||||
|
|
||||||
if (month > 0)
|
if (month > 0)
|
||||||
{
|
{
|
||||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 3d3683e77753cfe015a05fae95ddf418e19f59e1
|
Subproject commit 70468326ad5d72e9497944838484c591dae054ea
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
|||||||
Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc
|
Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
|
2
contrib/simdjson
vendored
2
contrib/simdjson
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 3190d66a49059092a1753dc35595923debfc1698
|
Subproject commit 95b4870e20be5f97d9dcf63b23b1c6f520c366c1
|
@ -18,6 +18,7 @@ RUN apt-get update \
|
|||||||
clickhouse-client=$version \
|
clickhouse-client=$version \
|
||||||
clickhouse-common-static=$version \
|
clickhouse-common-static=$version \
|
||||||
locales \
|
locales \
|
||||||
|
tzdata \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
|
||||||
&& apt-get clean
|
&& apt-get clean
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ RUN groupadd -r clickhouse --gid=101 \
|
|||||||
clickhouse-server=$version \
|
clickhouse-server=$version \
|
||||||
locales \
|
locales \
|
||||||
wget \
|
wget \
|
||||||
|
tzdata \
|
||||||
&& rm -rf \
|
&& rm -rf \
|
||||||
/var/lib/apt/lists/* \
|
/var/lib/apt/lists/* \
|
||||||
/var/cache/debconf \
|
/var/cache/debconf \
|
||||||
|
@ -21,7 +21,9 @@ RUN addgroup -S -g 101 clickhouse \
|
|||||||
&& chown clickhouse:clickhouse /var/lib/clickhouse \
|
&& chown clickhouse:clickhouse /var/lib/clickhouse \
|
||||||
&& chown root:clickhouse /var/log/clickhouse-server \
|
&& chown root:clickhouse /var/log/clickhouse-server \
|
||||||
&& chmod +x /entrypoint.sh \
|
&& chmod +x /entrypoint.sh \
|
||||||
&& apk add --no-cache su-exec bash \
|
&& apk add --no-cache su-exec bash tzdata \
|
||||||
|
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
|
||||||
|
&& echo "UTC" > /etc/timezone \
|
||||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||||
|
|
||||||
# we need to allow "others" access to clickhouse folder, because docker container
|
# we need to allow "others" access to clickhouse folder, because docker container
|
||||||
|
@ -46,9 +46,11 @@ DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --
|
|||||||
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
|
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
|
||||||
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
|
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
|
||||||
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
|
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
|
||||||
LOG_DIR="$(dirname "$LOG_PATH" || true)"
|
LOG_DIR=""
|
||||||
|
if [ -n "$LOG_PATH" ]; then LOG_DIR="$(dirname "$LOG_PATH")"; fi
|
||||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
|
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
|
||||||
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
|
ERROR_LOG_DIR=""
|
||||||
|
if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")"; fi
|
||||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
|
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
|
||||||
|
|
||||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||||
|
@ -292,6 +292,7 @@ function run_tests
|
|||||||
01318_decrypt # Depends on OpenSSL
|
01318_decrypt # Depends on OpenSSL
|
||||||
01663_aes_msan # Depends on OpenSSL
|
01663_aes_msan # Depends on OpenSSL
|
||||||
01667_aes_args_check # Depends on OpenSSL
|
01667_aes_args_check # Depends on OpenSSL
|
||||||
|
01776_decrypt_aead_size_check # Depends on OpenSSL
|
||||||
01281_unsucceeded_insert_select_queries_counter
|
01281_unsucceeded_insert_select_queries_counter
|
||||||
01292_create_user
|
01292_create_user
|
||||||
01294_lazy_database_concurrent
|
01294_lazy_database_concurrent
|
||||||
|
@ -266,14 +266,13 @@ for query_index in queries_to_run:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Will also detect too long queries during warmup stage
|
# Will also detect too long queries during warmup stage
|
||||||
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
|
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||||
except clickhouse_driver.errors.Error as e:
|
except clickhouse_driver.errors.Error as e:
|
||||||
# Add query id to the exception to make debugging easier.
|
# Add query id to the exception to make debugging easier.
|
||||||
e.args = (prewarm_id, *e.args)
|
e.args = (prewarm_id, *e.args)
|
||||||
e.message = prewarm_id + ': ' + e.message
|
e.message = prewarm_id + ': ' + e.message
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
@ -320,7 +319,7 @@ for query_index in queries_to_run:
|
|||||||
|
|
||||||
for conn_index, c in enumerate(this_query_connections):
|
for conn_index, c in enumerate(this_query_connections):
|
||||||
try:
|
try:
|
||||||
res = c.execute(q, query_id = run_id)
|
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||||
except clickhouse_driver.errors.Error as e:
|
except clickhouse_driver.errors.Error as e:
|
||||||
# Add query id to the exception to make debugging easier.
|
# Add query id to the exception to make debugging easier.
|
||||||
e.args = (run_id, *e.args)
|
e.args = (run_id, *e.args)
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
FROM ubuntu:20.04
|
FROM ubuntu:20.04
|
||||||
|
|
||||||
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
|
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
|
||||||
|
|
||||||
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
|
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
|
||||||
RUN mkdir /sqlancer && \
|
RUN mkdir /sqlancer && \
|
||||||
cd /sqlancer && \
|
cd /sqlancer && \
|
||||||
|
@ -3,7 +3,7 @@ toc_priority: 8
|
|||||||
toc_title: PostgreSQL
|
toc_title: PostgreSQL
|
||||||
---
|
---
|
||||||
|
|
||||||
# PosgtreSQL {#postgresql}
|
# PostgreSQL {#postgresql}
|
||||||
|
|
||||||
The PostgreSQL engine allows you to perform `SELECT` queries on data that is stored on a remote PostgreSQL server.
|
The PostgreSQL engine allows you to perform `SELECT` queries on data that is stored on a remote PostgreSQL server.
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
|
|||||||
Using [CLI interface](../../interfaces/cli.md):
|
Using [CLI interface](../../interfaces/cli.md):
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
|
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
|
||||||
```
|
```
|
||||||
|
|
||||||
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
|
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
|
||||||
|
@ -50,7 +50,7 @@ The supported formats are:
|
|||||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||||
| [ORC](#data-format-orc) | ✔ | ✗ |
|
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||||
| [Native](#native) | ✔ | ✔ |
|
| [Native](#native) | ✔ | ✔ |
|
||||||
@ -1284,32 +1284,33 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
|
|||||||
|
|
||||||
## ORC {#data-format-orc}
|
## ORC {#data-format-orc}
|
||||||
|
|
||||||
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse.
|
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the [Hadoop](https://hadoop.apache.org/) ecosystem.
|
||||||
|
|
||||||
### Data Types Matching {#data_types-matching-3}
|
### Data Types Matching {#data_types-matching-3}
|
||||||
|
|
||||||
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` queries.
|
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||||
|
|
||||||
| ORC data type (`INSERT`) | ClickHouse data type |
|
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||||
|--------------------------|-----------------------------------------------------|
|
|--------------------------|-----------------------------------------------------|--------------------------|
|
||||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
|
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
|
||||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
|
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
|
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
|
||||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
|
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
|
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
|
||||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
|
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
|
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
|
||||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
|
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
|
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
|
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
|
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
|
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
|
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||||
|
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||||
|
|
||||||
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
|
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
|
||||||
|
|
||||||
Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||||
|
|
||||||
The data types of ClickHouse table columns don’t have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
|
The data types of ClickHouse table columns don’t have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
|
||||||
|
|
||||||
@ -1321,6 +1322,14 @@ You can insert ORC data from a file into ClickHouse table by the following comma
|
|||||||
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Selecting Data {#selecting-data-2}
|
||||||
|
|
||||||
|
You can select data from a ClickHouse table and save them into some file in the ORC format by the following command:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
|
||||||
|
```
|
||||||
|
|
||||||
To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md).
|
To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md).
|
||||||
|
|
||||||
## LineAsString {#lineasstring}
|
## LineAsString {#lineasstring}
|
||||||
|
@ -9,7 +9,7 @@ Columns:
|
|||||||
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
|
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
|
||||||
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
|
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
|
||||||
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
|
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
|
||||||
- `last_error_stacktrace` ([String](../../sql-reference/data-types/string.md)) — stacktrace for the last error.
|
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
|
||||||
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
|
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
@ -25,3 +25,12 @@ LIMIT 1
|
|||||||
│ CANNOT_OPEN_FILE │ 76 │ 1 │
|
│ CANNOT_OPEN_FILE │ 76 │ 1 │
|
||||||
└──────────────────┴──────┴───────┘
|
└──────────────────┴──────┴───────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all
|
||||||
|
SELECT name, arrayStringConcat(all, '\n') AS res
|
||||||
|
FROM system.errors
|
||||||
|
LIMIT 1
|
||||||
|
SETTINGS allow_introspection_functions=1\G
|
||||||
|
```
|
||||||
|
|
||||||
|
@ -320,8 +320,6 @@ Similar to `cache`, but stores data on SSD and index in RAM.
|
|||||||
<write_buffer_size>1048576</write_buffer_size>
|
<write_buffer_size>1048576</write_buffer_size>
|
||||||
<!-- Path where cache file will be stored. -->
|
<!-- Path where cache file will be stored. -->
|
||||||
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
||||||
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
|
|
||||||
<max_stored_keys>1048576</max_stored_keys>
|
|
||||||
</ssd_cache>
|
</ssd_cache>
|
||||||
</layout>
|
</layout>
|
||||||
```
|
```
|
||||||
@ -329,8 +327,8 @@ Similar to `cache`, but stores data on SSD and index in RAM.
|
|||||||
or
|
or
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
|
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
|
||||||
```
|
```
|
||||||
|
|
||||||
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
||||||
|
@ -23,7 +23,9 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
|||||||
| `GROUPS` frame | not supported |
|
| `GROUPS` frame | not supported |
|
||||||
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
|
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
|
||||||
| `rank()`, `dense_rank()`, `row_number()` | supported |
|
| `rank()`, `dense_rank()`, `row_number()` | supported |
|
||||||
| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
| `lag/lead(value, offset)` | Not supported. Workarounds: |
|
||||||
|
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
||||||
|
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
|
|||||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||||
| [ORC](#data-format-orc) | ✔ | ✗ |
|
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||||
| [Native](#native) | ✔ | ✔ |
|
| [Native](#native) | ✔ | ✔ |
|
||||||
@ -1203,45 +1203,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
|
|||||||
|
|
||||||
## ORC {#data-format-orc}
|
## ORC {#data-format-orc}
|
||||||
|
|
||||||
[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
|
[Apache ORC](https://orc.apache.org/) — это столбцовый формат данных, распространенный в экосистеме [Hadoop](https://hadoop.apache.org/).
|
||||||
|
|
||||||
### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1}
|
### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1}
|
||||||
|
|
||||||
Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT`.
|
Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
|
||||||
|
|
||||||
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse |
|
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | Тип данных ORC (`SELECT`) |
|
||||||
|---------------------------|-----------------------------------------------------|
|
|---------------------------|-----------------------------------------------------|---------------------------|
|
||||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
|
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
|
||||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
|
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
|
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
|
||||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
|
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
|
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
|
||||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
|
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
|
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
|
||||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
|
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
|
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
|
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
|
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
|
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
|
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||||
|
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||||
|
|
||||||
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
|
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
|
||||||
|
|
||||||
Неподдержанные типы данных ORC: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||||
|
|
||||||
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных, ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
|
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
|
||||||
|
|
||||||
### Вставка данных {#vstavka-dannykh-1}
|
### Вставка данных {#vstavka-dannykh-1}
|
||||||
|
|
||||||
Данные ORC можно вставить в таблицу ClickHouse командой:
|
Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
||||||
```
|
```
|
||||||
|
|
||||||
Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
|
### Вывод данных {#vyvod-dannykh-1}
|
||||||
|
|
||||||
|
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
|
||||||
|
```
|
||||||
|
|
||||||
|
Для обмена данных с экосистемой Hadoop вы можете использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
|
||||||
|
|
||||||
## LineAsString {#lineasstring}
|
## LineAsString {#lineasstring}
|
||||||
|
|
||||||
|
@ -318,8 +318,6 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
|
|||||||
<write_buffer_size>1048576</write_buffer_size>
|
<write_buffer_size>1048576</write_buffer_size>
|
||||||
<!-- Path where cache file will be stored. -->
|
<!-- Path where cache file will be stored. -->
|
||||||
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
||||||
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
|
|
||||||
<max_stored_keys>1048576</max_stored_keys>
|
|
||||||
</ssd_cache>
|
</ssd_cache>
|
||||||
</layout>
|
</layout>
|
||||||
```
|
```
|
||||||
@ -327,8 +325,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
|
|||||||
или
|
или
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
|
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
|
||||||
```
|
```
|
||||||
|
|
||||||
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
||||||
|
@ -672,7 +672,7 @@ neighbor(column, offset[, default_value])
|
|||||||
Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных.
|
Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных.
|
||||||
|
|
||||||
Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю.
|
Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю.
|
||||||
Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию изне подзапроса.
|
Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию извне подзапроса.
|
||||||
|
|
||||||
**Аргументы**
|
**Аргументы**
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ numpy==1.19.2
|
|||||||
Pygments==2.5.2
|
Pygments==2.5.2
|
||||||
pymdown-extensions==8.0
|
pymdown-extensions==8.0
|
||||||
python-slugify==4.0.1
|
python-slugify==4.0.1
|
||||||
PyYAML==5.3.1
|
PyYAML==5.4.1
|
||||||
repackage==0.7.3
|
repackage==0.7.3
|
||||||
requests==2.24.0
|
requests==2.24.0
|
||||||
singledispatch==3.4.0.3
|
singledispatch==3.4.0.3
|
||||||
|
@ -8,10 +8,10 @@
|
|||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/copyData.h>
|
#include <IO/copyData.h>
|
||||||
|
#include <IO/TimeoutSetter.h>
|
||||||
#include <DataStreams/NativeBlockInputStream.h>
|
#include <DataStreams/NativeBlockInputStream.h>
|
||||||
#include <DataStreams/NativeBlockOutputStream.h>
|
#include <DataStreams/NativeBlockOutputStream.h>
|
||||||
#include <Client/Connection.h>
|
#include <Client/Connection.h>
|
||||||
#include <Client/TimeoutSetter.h>
|
|
||||||
#include <Common/ClickHouseRevision.h>
|
#include <Common/ClickHouseRevision.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/NetException.h>
|
#include <Common/NetException.h>
|
||||||
|
@ -16,7 +16,6 @@ SRCS(
|
|||||||
HedgedConnections.cpp
|
HedgedConnections.cpp
|
||||||
HedgedConnectionsFactory.cpp
|
HedgedConnectionsFactory.cpp
|
||||||
MultiplexedConnections.cpp
|
MultiplexedConnections.cpp
|
||||||
TimeoutSetter.cpp
|
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -560,7 +560,7 @@ namespace DB
|
|||||||
{
|
{
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
#define M(VALUE, NAME) extern const Value NAME = VALUE;
|
#define M(VALUE, NAME) extern const ErrorCode NAME = VALUE;
|
||||||
APPLY_FOR_ERROR_CODES(M)
|
APPLY_FOR_ERROR_CODES(M)
|
||||||
#undef M
|
#undef M
|
||||||
|
|
||||||
@ -587,7 +587,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
ErrorCode end() { return END + 1; }
|
ErrorCode end() { return END + 1; }
|
||||||
|
|
||||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace)
|
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace)
|
||||||
{
|
{
|
||||||
if (error_code >= end())
|
if (error_code >= end())
|
||||||
{
|
{
|
||||||
@ -596,10 +596,10 @@ namespace ErrorCodes
|
|||||||
error_code = end() - 1;
|
error_code = end() - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
values[error_code].increment(remote, message, stacktrace);
|
values[error_code].increment(remote, message, trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ErrorPairHolder::increment(bool remote, const std::string & message, const std::string & stacktrace)
|
void ErrorPairHolder::increment(bool remote, const std::string & message, const FramePointers & trace)
|
||||||
{
|
{
|
||||||
const auto now = std::chrono::system_clock::now();
|
const auto now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
@ -609,7 +609,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
++error.count;
|
++error.count;
|
||||||
error.message = message;
|
error.message = message;
|
||||||
error.stacktrace = stacktrace;
|
error.trace = trace;
|
||||||
error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
|
error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
|
||||||
}
|
}
|
||||||
ErrorPair ErrorPairHolder::get()
|
ErrorPair ErrorPairHolder::get()
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <common/types.h>
|
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
#include <vector>
|
||||||
|
#include <common/types.h>
|
||||||
|
|
||||||
/** Allows to count number of simultaneously happening error codes.
|
/** Allows to count number of simultaneously happening error codes.
|
||||||
* See also Exception.cpp for incrementing part.
|
* See also Exception.cpp for incrementing part.
|
||||||
@ -19,6 +20,7 @@ namespace ErrorCodes
|
|||||||
/// ErrorCode identifier (index in array).
|
/// ErrorCode identifier (index in array).
|
||||||
using ErrorCode = int;
|
using ErrorCode = int;
|
||||||
using Value = size_t;
|
using Value = size_t;
|
||||||
|
using FramePointers = std::vector<void *>;
|
||||||
|
|
||||||
/// Get name of error_code by identifier.
|
/// Get name of error_code by identifier.
|
||||||
/// Returns statically allocated string.
|
/// Returns statically allocated string.
|
||||||
@ -33,7 +35,7 @@ namespace ErrorCodes
|
|||||||
/// Message for the last error.
|
/// Message for the last error.
|
||||||
std::string message;
|
std::string message;
|
||||||
/// Stacktrace for the last error.
|
/// Stacktrace for the last error.
|
||||||
std::string stacktrace;
|
FramePointers trace;
|
||||||
};
|
};
|
||||||
struct ErrorPair
|
struct ErrorPair
|
||||||
{
|
{
|
||||||
@ -46,7 +48,7 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ErrorPair get();
|
ErrorPair get();
|
||||||
void increment(bool remote, const std::string & message, const std::string & stacktrace);
|
void increment(bool remote, const std::string & message, const FramePointers & trace);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ErrorPair value;
|
ErrorPair value;
|
||||||
@ -60,7 +62,7 @@ namespace ErrorCodes
|
|||||||
ErrorCode end();
|
ErrorCode end();
|
||||||
|
|
||||||
/// Add value for specified error_code.
|
/// Add value for specified error_code.
|
||||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace);
|
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
/// - Aborts the process if error code is LOGICAL_ERROR.
|
/// - Aborts the process if error code is LOGICAL_ERROR.
|
||||||
/// - Increments error codes statistics.
|
/// - Increments error codes statistics.
|
||||||
void handle_error_code([[maybe_unused]] const std::string & msg, const std::string & stacktrace, int code, bool remote)
|
void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
|
||||||
{
|
{
|
||||||
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
|
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
|
||||||
// Log the message before we fail.
|
// Log the message before we fail.
|
||||||
@ -47,20 +47,21 @@ void handle_error_code([[maybe_unused]] const std::string & msg, const std::stri
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
ErrorCodes::increment(code, remote, msg, stacktrace);
|
|
||||||
|
ErrorCodes::increment(code, remote, msg, trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
Exception::Exception(const std::string & msg, int code, bool remote_)
|
Exception::Exception(const std::string & msg, int code, bool remote_)
|
||||||
: Poco::Exception(msg, code)
|
: Poco::Exception(msg, code)
|
||||||
, remote(remote_)
|
, remote(remote_)
|
||||||
{
|
{
|
||||||
handle_error_code(msg, getStackTraceString(), code, remote);
|
handle_error_code(msg, code, remote, getStackFramePointers());
|
||||||
}
|
}
|
||||||
|
|
||||||
Exception::Exception(const std::string & msg, const Exception & nested, int code)
|
Exception::Exception(const std::string & msg, const Exception & nested, int code)
|
||||||
: Poco::Exception(msg, nested, code)
|
: Poco::Exception(msg, nested, code)
|
||||||
{
|
{
|
||||||
handle_error_code(msg, getStackTraceString(), code, remote);
|
handle_error_code(msg, code, remote, getStackFramePointers());
|
||||||
}
|
}
|
||||||
|
|
||||||
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
|
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
|
||||||
@ -101,6 +102,31 @@ std::string Exception::getStackTraceString() const
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Exception::FramePointers Exception::getStackFramePointers() const
|
||||||
|
{
|
||||||
|
FramePointers frame_pointers;
|
||||||
|
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
|
||||||
|
{
|
||||||
|
frame_pointers.resize(get_stack_trace_size());
|
||||||
|
for (size_t i = 0; i < frame_pointers.size(); ++i)
|
||||||
|
{
|
||||||
|
frame_pointers[i] = get_stack_trace_frames()[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
size_t stack_trace_size = trace.getSize();
|
||||||
|
size_t stack_trace_offset = trace.getOffset();
|
||||||
|
frame_pointers.reserve(stack_trace_size - stack_trace_offset);
|
||||||
|
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
|
||||||
|
{
|
||||||
|
frame_pointers.push_back(trace.getFramePointers()[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return frame_pointers;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void throwFromErrno(const std::string & s, int code, int the_errno)
|
void throwFromErrno(const std::string & s, int code, int the_errno)
|
||||||
{
|
{
|
||||||
|
@ -24,6 +24,8 @@ namespace DB
|
|||||||
class Exception : public Poco::Exception
|
class Exception : public Poco::Exception
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using FramePointers = std::vector<void *>;
|
||||||
|
|
||||||
Exception() = default;
|
Exception() = default;
|
||||||
Exception(const std::string & msg, int code, bool remote_ = false);
|
Exception(const std::string & msg, int code, bool remote_ = false);
|
||||||
Exception(const std::string & msg, const Exception & nested, int code);
|
Exception(const std::string & msg, const Exception & nested, int code);
|
||||||
@ -66,6 +68,8 @@ public:
|
|||||||
bool isRemoteException() const { return remote; }
|
bool isRemoteException() const { return remote; }
|
||||||
|
|
||||||
std::string getStackTraceString() const;
|
std::string getStackTraceString() const;
|
||||||
|
/// Used for system.errors
|
||||||
|
FramePointers getStackFramePointers() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
#ifndef STD_EXCEPTION_HAS_STACK_TRACE
|
#ifndef STD_EXCEPTION_HAS_STACK_TRACE
|
||||||
|
@ -271,13 +271,13 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Key, typename Mapped>
|
template <typename Key, typename Mapped>
|
||||||
struct DefaultCellDisposer
|
struct DefaultLRUHashMapCellDisposer
|
||||||
{
|
{
|
||||||
void operator()(const Key &, const Mapped &) const {}
|
void operator()(const Key &, const Mapped &) const {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||||
using LRUHashMap = LRUHashMapImpl<Key, Value, Disposer, Hash, false>;
|
using LRUHashMap = LRUHashMapImpl<Key, Value, Disposer, Hash, false>;
|
||||||
|
|
||||||
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||||
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Disposer, Hash, true>;
|
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Disposer, Hash, true>;
|
||||||
|
@ -692,6 +692,30 @@ public:
|
|||||||
assign(from.begin(), from.end());
|
assign(from.begin(), from.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void erase(const_iterator first, const_iterator last)
|
||||||
|
{
|
||||||
|
iterator first_no_const = const_cast<iterator>(first);
|
||||||
|
iterator last_no_const = const_cast<iterator>(last);
|
||||||
|
|
||||||
|
size_t items_to_move = end() - last;
|
||||||
|
|
||||||
|
while (items_to_move != 0)
|
||||||
|
{
|
||||||
|
*first_no_const = *last_no_const;
|
||||||
|
|
||||||
|
++first_no_const;
|
||||||
|
++last_no_const;
|
||||||
|
|
||||||
|
--items_to_move;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->c_end = reinterpret_cast<char *>(first_no_const);
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase(const_iterator pos)
|
||||||
|
{
|
||||||
|
this->erase(pos, pos + 1);
|
||||||
|
}
|
||||||
|
|
||||||
bool operator== (const PODArray & rhs) const
|
bool operator== (const PODArray & rhs) const
|
||||||
{
|
{
|
||||||
|
@ -92,3 +92,57 @@ TEST(Common, PODInsertElementSizeNotMultipleOfLeftPadding)
|
|||||||
|
|
||||||
EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size);
|
EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Common, PODErase)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
PaddedPODArray<UInt64> items {0,1,2,3,4,5,6,7,8,9};
|
||||||
|
PaddedPODArray<UInt64> expected;
|
||||||
|
expected = {0,1,2,3,4,5,6,7,8,9};
|
||||||
|
|
||||||
|
items.erase(items.begin(), items.begin());
|
||||||
|
EXPECT_EQ(items, expected);
|
||||||
|
|
||||||
|
items.erase(items.end(), items.end());
|
||||||
|
EXPECT_EQ(items, expected);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
|
||||||
|
PaddedPODArray<UInt64> expected;
|
||||||
|
|
||||||
|
expected = {0,1,4,5,6,7,8,9};
|
||||||
|
actual.erase(actual.begin() + 2, actual.begin() + 4);
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
|
||||||
|
expected = {0,1,4};
|
||||||
|
actual.erase(actual.begin() + 3, actual.end());
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
|
||||||
|
expected = {};
|
||||||
|
actual.erase(actual.begin(), actual.end());
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 10; ++i)
|
||||||
|
actual.emplace_back(static_cast<UInt64>(i));
|
||||||
|
|
||||||
|
expected = {0,1,4,5,6,7,8,9};
|
||||||
|
actual.erase(actual.begin() + 2, actual.begin() + 4);
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
|
||||||
|
expected = {0,1,4};
|
||||||
|
actual.erase(actual.begin() + 3, actual.end());
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
|
||||||
|
expected = {};
|
||||||
|
actual.erase(actual.begin(), actual.end());
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
|
||||||
|
PaddedPODArray<UInt64> expected;
|
||||||
|
|
||||||
|
expected = {1,2,3,4,5,6,7,8,9};
|
||||||
|
actual.erase(actual.begin());
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -51,7 +51,7 @@ bool CachedCompressedReadBuffer::nextImpl()
|
|||||||
{
|
{
|
||||||
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
|
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
|
||||||
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
|
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
|
||||||
decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
|
decompressTo(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ bool CompressedReadBuffer::nextImpl()
|
|||||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||||
|
|
||||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -48,7 +48,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
|
|||||||
/// If the decompressed block fits entirely where it needs to be copied.
|
/// If the decompressed block fits entirely where it needs to be copied.
|
||||||
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
||||||
{
|
{
|
||||||
decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||||
bytes_read += size_decompressed;
|
bytes_read += size_decompressed;
|
||||||
bytes += size_decompressed;
|
bytes += size_decompressed;
|
||||||
}
|
}
|
||||||
@ -61,9 +61,9 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
|
|||||||
|
|
||||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||||
pos = working_buffer.begin();
|
|
||||||
|
|
||||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||||
|
pos = working_buffer.begin();
|
||||||
|
|
||||||
bytes_read += read(to + bytes_read, n - bytes_read);
|
bytes_read += read(to + bytes_read, n - bytes_read);
|
||||||
break;
|
break;
|
||||||
|
@ -184,7 +184,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs)
|
||||||
{
|
{
|
||||||
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
|
||||||
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
|
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
|
||||||
@ -210,11 +210,38 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
|
|||||||
ErrorCodes::CANNOT_DECOMPRESS);
|
ErrorCodes::CANNOT_DECOMPRESS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
||||||
|
{
|
||||||
|
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
|
||||||
codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
|
codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
||||||
|
{
|
||||||
|
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
|
||||||
|
|
||||||
|
if (codec->isNone())
|
||||||
|
{
|
||||||
|
/// Shortcut for NONE codec to avoid extra memcpy.
|
||||||
|
/// We doing it by changing the buffer `to` to point to existing uncompressed data.
|
||||||
|
|
||||||
|
UInt8 header_size = ICompressionCodec::getHeaderSize();
|
||||||
|
if (size_compressed_without_checksum < header_size)
|
||||||
|
throw Exception(ErrorCodes::CORRUPTED_DATA,
|
||||||
|
"Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})",
|
||||||
|
size_compressed_without_checksum, static_cast<size_t>(header_size));
|
||||||
|
|
||||||
|
to = BufferBase::Buffer(compressed_buffer + header_size, compressed_buffer + size_compressed_without_checksum);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
||||||
CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
|
CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
|
||||||
: compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
|
: compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <Common/PODArray.h>
|
#include <Common/PODArray.h>
|
||||||
#include <Compression/LZ4_decompress_faster.h>
|
#include <Compression/LZ4_decompress_faster.h>
|
||||||
#include <Compression/ICompressionCodec.h>
|
#include <Compression/ICompressionCodec.h>
|
||||||
|
#include <IO/BufferBase.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -37,7 +38,12 @@ protected:
|
|||||||
/// Returns number of compressed bytes read.
|
/// Returns number of compressed bytes read.
|
||||||
size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);
|
size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);
|
||||||
|
|
||||||
void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
/// Decompress into memory pointed by `to`
|
||||||
|
void decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
||||||
|
|
||||||
|
/// This method can change location of `to` to avoid unnecessary copy if data is uncompressed.
|
||||||
|
/// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location.
|
||||||
|
void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
||||||
|
@ -31,7 +31,7 @@ bool CompressedReadBufferFromFile::nextImpl()
|
|||||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||||
|
|
||||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -108,7 +108,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
|
|||||||
/// If the decompressed block fits entirely where it needs to be copied.
|
/// If the decompressed block fits entirely where it needs to be copied.
|
||||||
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
||||||
{
|
{
|
||||||
decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||||
bytes_read += size_decompressed;
|
bytes_read += size_decompressed;
|
||||||
bytes += size_decompressed;
|
bytes += size_decompressed;
|
||||||
}
|
}
|
||||||
@ -122,9 +122,9 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
|
|||||||
|
|
||||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||||
pos = working_buffer.begin();
|
|
||||||
|
|
||||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||||
|
pos = working_buffer.begin();
|
||||||
|
|
||||||
bytes_read += read(to + bytes_read, n - bytes_read);
|
bytes_read += read(to + bytes_read, n - bytes_read);
|
||||||
break;
|
break;
|
||||||
|
@ -98,7 +98,7 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
|
|||||||
|
|
||||||
UInt8 header_size = getHeaderSize();
|
UInt8 header_size = getHeaderSize();
|
||||||
if (source_size < header_size)
|
if (source_size < header_size)
|
||||||
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}), this should include header size) is less than the header size ({})", source_size, size_t(header_size));
|
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})", source_size, static_cast<size_t>(header_size));
|
||||||
|
|
||||||
uint8_t our_method = getMethodByte();
|
uint8_t our_method = getMethodByte();
|
||||||
uint8_t method = source[0];
|
uint8_t method = source[0];
|
||||||
|
@ -31,6 +31,8 @@ struct Settings;
|
|||||||
M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
|
M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
|
||||||
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
||||||
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
||||||
|
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||||
|
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||||
M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
|
M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
|
||||||
|
|
||||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||||
|
@ -30,6 +30,8 @@ NuKeeperServer::NuKeeperServer(
|
|||||||
, state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
|
, state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
|
||||||
, responses_queue(responses_queue_)
|
, responses_queue(responses_queue_)
|
||||||
{
|
{
|
||||||
|
if (coordination_settings->quorum_reads)
|
||||||
|
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Quorum reads enabled, NuKeeper will work slower.");
|
||||||
}
|
}
|
||||||
|
|
||||||
void NuKeeperServer::startup()
|
void NuKeeperServer::startup()
|
||||||
@ -59,6 +61,7 @@ void NuKeeperServer::startup()
|
|||||||
params.reserved_log_items_ = coordination_settings->reserved_log_items;
|
params.reserved_log_items_ = coordination_settings->reserved_log_items;
|
||||||
params.snapshot_distance_ = coordination_settings->snapshot_distance;
|
params.snapshot_distance_ = coordination_settings->snapshot_distance;
|
||||||
params.stale_log_gap_ = coordination_settings->stale_log_gap;
|
params.stale_log_gap_ = coordination_settings->stale_log_gap;
|
||||||
|
params.fresh_log_gap_ = coordination_settings->fresh_log_gap;
|
||||||
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||||
params.auto_forwarding_ = coordination_settings->auto_forwarding;
|
params.auto_forwarding_ = coordination_settings->auto_forwarding;
|
||||||
params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
|
params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
|
||||||
@ -106,7 +109,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
|
|||||||
void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
|
void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
|
||||||
{
|
{
|
||||||
auto [session_id, request] = request_for_session;
|
auto [session_id, request] = request_for_session;
|
||||||
if (isLeaderAlive() && request->isReadRequest())
|
if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
|
||||||
{
|
{
|
||||||
state_machine->processReadRequest(request_for_session);
|
state_machine->processReadRequest(request_for_session);
|
||||||
}
|
}
|
||||||
@ -185,6 +188,9 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
|||||||
if (next_index < last_commited || next_index - last_commited <= 1)
|
if (next_index < last_commited || next_index - last_commited <= 1)
|
||||||
commited_store = true;
|
commited_store = true;
|
||||||
|
|
||||||
|
if (initialized_flag)
|
||||||
|
return nuraft::cb_func::ReturnCode::Ok;
|
||||||
|
|
||||||
auto set_initialized = [this] ()
|
auto set_initialized = [this] ()
|
||||||
{
|
{
|
||||||
std::unique_lock lock(initialized_mutex);
|
std::unique_lock lock(initialized_mutex);
|
||||||
@ -196,10 +202,27 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
|||||||
{
|
{
|
||||||
case nuraft::cb_func::BecomeLeader:
|
case nuraft::cb_func::BecomeLeader:
|
||||||
{
|
{
|
||||||
if (commited_store) /// We become leader and store is empty, ready to serve requests
|
/// We become leader and store is empty or we already committed it
|
||||||
|
if (commited_store || initial_batch_committed)
|
||||||
set_initialized();
|
set_initialized();
|
||||||
return nuraft::cb_func::ReturnCode::Ok;
|
return nuraft::cb_func::ReturnCode::Ok;
|
||||||
}
|
}
|
||||||
|
case nuraft::cb_func::BecomeFollower:
|
||||||
|
case nuraft::cb_func::GotAppendEntryReqFromLeader:
|
||||||
|
{
|
||||||
|
if (isLeaderAlive())
|
||||||
|
{
|
||||||
|
auto leader_index = raft_instance->get_leader_committed_log_idx();
|
||||||
|
auto our_index = raft_instance->get_committed_log_idx();
|
||||||
|
/// This may happen when we start RAFT cluster from scratch.
|
||||||
|
/// Node first became leader, and after that some other node became leader.
|
||||||
|
/// BecameFresh for this node will not be called because it was already fresh
|
||||||
|
/// when it was leader.
|
||||||
|
if (leader_index < our_index + coordination_settings->fresh_log_gap)
|
||||||
|
set_initialized();
|
||||||
|
}
|
||||||
|
return nuraft::cb_func::ReturnCode::Ok;
|
||||||
|
}
|
||||||
case nuraft::cb_func::BecomeFresh:
|
case nuraft::cb_func::BecomeFresh:
|
||||||
{
|
{
|
||||||
set_initialized(); /// We are fresh follower, ready to serve requests.
|
set_initialized(); /// We are fresh follower, ready to serve requests.
|
||||||
@ -209,6 +232,7 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
|||||||
{
|
{
|
||||||
if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
|
if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
|
||||||
set_initialized();
|
set_initialized();
|
||||||
|
initial_batch_committed = true;
|
||||||
return nuraft::cb_func::ReturnCode::Ok;
|
return nuraft::cb_func::ReturnCode::Ok;
|
||||||
}
|
}
|
||||||
default: /// ignore other events
|
default: /// ignore other events
|
||||||
@ -220,7 +244,7 @@ void NuKeeperServer::waitInit()
|
|||||||
{
|
{
|
||||||
std::unique_lock lock(initialized_mutex);
|
std::unique_lock lock(initialized_mutex);
|
||||||
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
|
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
|
||||||
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
|
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
|
||||||
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
|
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,8 +31,9 @@ private:
|
|||||||
ResponsesQueue & responses_queue;
|
ResponsesQueue & responses_queue;
|
||||||
|
|
||||||
std::mutex initialized_mutex;
|
std::mutex initialized_mutex;
|
||||||
bool initialized_flag = false;
|
std::atomic<bool> initialized_flag = false;
|
||||||
std::condition_variable initialized_cv;
|
std::condition_variable initialized_cv;
|
||||||
|
std::atomic<bool> initial_batch_committed = false;
|
||||||
|
|
||||||
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
|
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
|
||||||
|
|
||||||
|
@ -241,9 +241,10 @@ NuKeeperStorageSnapshot::~NuKeeperStorageSnapshot()
|
|||||||
storage->disableSnapshotMode();
|
storage->disableSnapshotMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_)
|
NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_)
|
||||||
: snapshots_path(snapshots_path_)
|
: snapshots_path(snapshots_path_)
|
||||||
, snapshots_to_keep(snapshots_to_keep_)
|
, snapshots_to_keep(snapshots_to_keep_)
|
||||||
|
, storage_tick_time(storage_tick_time_)
|
||||||
{
|
{
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
@ -325,22 +326,24 @@ nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::serializeSnapshotToBuffer(c
|
|||||||
return writer.getBuffer();
|
return writer.getBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
SnapshotMetadataPtr NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer)
|
SnapshotMetaAndStorage NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
|
||||||
{
|
{
|
||||||
ReadBufferFromNuraftBuffer reader(buffer);
|
ReadBufferFromNuraftBuffer reader(buffer);
|
||||||
CompressedReadBuffer compressed_reader(reader);
|
CompressedReadBuffer compressed_reader(reader);
|
||||||
return NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
|
auto storage = std::make_unique<NuKeeperStorage>(storage_tick_time);
|
||||||
|
auto snapshot_metadata = NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
|
||||||
|
return std::make_pair(snapshot_metadata, std::move(storage));
|
||||||
}
|
}
|
||||||
|
|
||||||
SnapshotMetadataPtr NuKeeperSnapshotManager::restoreFromLatestSnapshot(NuKeeperStorage * storage)
|
SnapshotMetaAndStorage NuKeeperSnapshotManager::restoreFromLatestSnapshot()
|
||||||
{
|
{
|
||||||
if (existing_snapshots.empty())
|
if (existing_snapshots.empty())
|
||||||
return nullptr;
|
return {};
|
||||||
|
|
||||||
auto buffer = deserializeLatestSnapshotBufferFromDisk();
|
auto buffer = deserializeLatestSnapshotBufferFromDisk();
|
||||||
if (!buffer)
|
if (!buffer)
|
||||||
return nullptr;
|
return {};
|
||||||
return deserializeSnapshotFromBuffer(storage, buffer);
|
return deserializeSnapshotFromBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void NuKeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
|
void NuKeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
|
||||||
|
@ -40,17 +40,20 @@ public:
|
|||||||
using NuKeeperStorageSnapshotPtr = std::shared_ptr<NuKeeperStorageSnapshot>;
|
using NuKeeperStorageSnapshotPtr = std::shared_ptr<NuKeeperStorageSnapshot>;
|
||||||
using CreateSnapshotCallback = std::function<void(NuKeeperStorageSnapshotPtr &&)>;
|
using CreateSnapshotCallback = std::function<void(NuKeeperStorageSnapshotPtr &&)>;
|
||||||
|
|
||||||
|
|
||||||
|
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, NuKeeperStoragePtr>;
|
||||||
|
|
||||||
class NuKeeperSnapshotManager
|
class NuKeeperSnapshotManager
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_);
|
NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_ = 500);
|
||||||
|
|
||||||
SnapshotMetadataPtr restoreFromLatestSnapshot(NuKeeperStorage * storage);
|
SnapshotMetaAndStorage restoreFromLatestSnapshot();
|
||||||
|
|
||||||
static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot);
|
static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot);
|
||||||
std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx);
|
std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx);
|
||||||
|
|
||||||
static SnapshotMetadataPtr deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer);
|
SnapshotMetaAndStorage deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
|
||||||
|
|
||||||
nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const;
|
nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const;
|
||||||
nuraft::ptr<nuraft::buffer> deserializeLatestSnapshotBufferFromDisk();
|
nuraft::ptr<nuraft::buffer> deserializeLatestSnapshotBufferFromDisk();
|
||||||
@ -74,6 +77,7 @@ private:
|
|||||||
const std::string snapshots_path;
|
const std::string snapshots_path;
|
||||||
const size_t snapshots_to_keep;
|
const size_t snapshots_to_keep;
|
||||||
std::map<size_t, std::string> existing_snapshots;
|
std::map<size_t, std::string> existing_snapshots;
|
||||||
|
size_t storage_tick_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CreateSnapshotTask
|
struct CreateSnapshotTask
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||||
#include <Coordination/NuKeeperSnapshotManager.h>
|
#include <Coordination/NuKeeperSnapshotManager.h>
|
||||||
|
#include <future>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -37,8 +38,7 @@ NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
|
|||||||
|
|
||||||
NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
|
NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
|
||||||
: coordination_settings(coordination_settings_)
|
: coordination_settings(coordination_settings_)
|
||||||
, storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
|
, snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep, coordination_settings->dead_session_check_period_ms.totalMicroseconds())
|
||||||
, snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep)
|
|
||||||
, responses_queue(responses_queue_)
|
, responses_queue(responses_queue_)
|
||||||
, snapshots_queue(snapshots_queue_)
|
, snapshots_queue(snapshots_queue_)
|
||||||
, last_committed_idx(0)
|
, last_committed_idx(0)
|
||||||
@ -60,7 +60,7 @@ void NuKeeperStateMachine::init()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
|
latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
|
||||||
latest_snapshot_meta = snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_buf);
|
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
|
||||||
last_committed_idx = latest_snapshot_meta->get_last_log_idx();
|
last_committed_idx = latest_snapshot_meta->get_last_log_idx();
|
||||||
loaded = true;
|
loaded = true;
|
||||||
break;
|
break;
|
||||||
@ -83,6 +83,9 @@ void NuKeeperStateMachine::init()
|
|||||||
{
|
{
|
||||||
LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx);
|
LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!storage)
|
||||||
|
storage = std::make_unique<NuKeeperStorage>(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
|
||||||
}
|
}
|
||||||
|
|
||||||
nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
|
nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
|
||||||
@ -96,7 +99,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
|
|||||||
nuraft::buffer_serializer bs(response);
|
nuraft::buffer_serializer bs(response);
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
session_id = storage.getSessionID(session_timeout_ms);
|
session_id = storage->getSessionID(session_timeout_ms);
|
||||||
bs.put_i64(session_id);
|
bs.put_i64(session_id);
|
||||||
}
|
}
|
||||||
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
|
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
|
||||||
@ -109,7 +112,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
|
|||||||
NuKeeperStorage::ResponsesForSessions responses_for_sessions;
|
NuKeeperStorage::ResponsesForSessions responses_for_sessions;
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id, log_idx);
|
responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx);
|
||||||
for (auto & response_for_session : responses_for_sessions)
|
for (auto & response_for_session : responses_for_sessions)
|
||||||
responses_queue.push(response_for_session);
|
responses_queue.push(response_for_session);
|
||||||
}
|
}
|
||||||
@ -133,7 +136,7 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
|||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_ptr);
|
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
|
||||||
}
|
}
|
||||||
last_committed_idx = s.get_last_log_idx();
|
last_committed_idx = s.get_last_log_idx();
|
||||||
return true;
|
return true;
|
||||||
@ -157,7 +160,7 @@ void NuKeeperStateMachine::create_snapshot(
|
|||||||
CreateSnapshotTask snapshot_task;
|
CreateSnapshotTask snapshot_task;
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(&storage, snapshot_meta_copy);
|
snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
snapshot_task.create_snapshot = [this, when_done] (NuKeeperStorageSnapshotPtr && snapshot)
|
snapshot_task.create_snapshot = [this, when_done] (NuKeeperStorageSnapshotPtr && snapshot)
|
||||||
@ -179,7 +182,7 @@ void NuKeeperStateMachine::create_snapshot(
|
|||||||
{
|
{
|
||||||
/// Must do it with lock (clearing elements from list)
|
/// Must do it with lock (clearing elements from list)
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
storage.clearGarbageAfterSnapshot();
|
storage->clearGarbageAfterSnapshot();
|
||||||
/// Destroy snapshot with lock
|
/// Destroy snapshot with lock
|
||||||
snapshot.reset();
|
snapshot.reset();
|
||||||
LOG_TRACE(log, "Cleared garbage after snapshot");
|
LOG_TRACE(log, "Cleared garbage after snapshot");
|
||||||
@ -214,7 +217,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
|||||||
if (obj_id == 0)
|
if (obj_id == 0)
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
NuKeeperStorageSnapshot snapshot(&storage, s.get_last_log_idx());
|
NuKeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
|
||||||
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
|
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -225,7 +228,28 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
|||||||
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
||||||
cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
|
cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
|
||||||
|
|
||||||
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, s.get_last_log_idx());
|
/// Sometimes NuRaft can call save and create snapshots from different threads
|
||||||
|
/// at once. To avoid race conditions we serialize snapshots through snapshots_queue
|
||||||
|
/// TODO: make something better
|
||||||
|
CreateSnapshotTask snapshot_task;
|
||||||
|
std::shared_ptr<std::promise<void>> waiter = std::make_shared<std::promise<void>>();
|
||||||
|
auto future = waiter->get_future();
|
||||||
|
snapshot_task.snapshot = nullptr;
|
||||||
|
snapshot_task.create_snapshot = [this, waiter, cloned_buffer, log_idx = s.get_last_log_idx()] (NuKeeperStorageSnapshotPtr &&)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, log_idx);
|
||||||
|
LOG_DEBUG(log, "Saved snapshot {} to path {}", log_idx, result_path);
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
tryLogCurrentException(log);
|
||||||
|
}
|
||||||
|
waiter->set_value();
|
||||||
|
};
|
||||||
|
snapshots_queue.push(std::move(snapshot_task));
|
||||||
|
future.wait();
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard lock(snapshots_lock);
|
std::lock_guard lock(snapshots_lock);
|
||||||
@ -233,7 +257,6 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
|||||||
latest_snapshot_meta = cloned_meta;
|
latest_snapshot_meta = cloned_meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUG(log, "Created snapshot {} with path {}", s.get_last_log_idx(), result_path);
|
|
||||||
|
|
||||||
obj_id++;
|
obj_id++;
|
||||||
}
|
}
|
||||||
@ -271,7 +294,7 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
|
|||||||
NuKeeperStorage::ResponsesForSessions responses;
|
NuKeeperStorage::ResponsesForSessions responses;
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
responses = storage.processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
|
responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
|
||||||
}
|
}
|
||||||
for (const auto & response : responses)
|
for (const auto & response : responses)
|
||||||
responses_queue.push(response);
|
responses_queue.push(response);
|
||||||
@ -280,13 +303,13 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
|
|||||||
std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
|
std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
return storage.getDeadSessions();
|
return storage->getDeadSessions();
|
||||||
}
|
}
|
||||||
|
|
||||||
void NuKeeperStateMachine::shutdownStorage()
|
void NuKeeperStateMachine::shutdownStorage()
|
||||||
{
|
{
|
||||||
std::lock_guard lock(storage_lock);
|
std::lock_guard lock(storage_lock);
|
||||||
storage.finalize();
|
storage->finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@ public:
|
|||||||
|
|
||||||
NuKeeperStorage & getStorage()
|
NuKeeperStorage & getStorage()
|
||||||
{
|
{
|
||||||
return storage;
|
return *storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
|
void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
|
||||||
@ -68,7 +68,7 @@ private:
|
|||||||
|
|
||||||
CoordinationSettingsPtr coordination_settings;
|
CoordinationSettingsPtr coordination_settings;
|
||||||
|
|
||||||
NuKeeperStorage storage;
|
NuKeeperStoragePtr storage;
|
||||||
|
|
||||||
NuKeeperSnapshotManager snapshot_manager;
|
NuKeeperSnapshotManager snapshot_manager;
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
|
|||||||
struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
||||||
{
|
{
|
||||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
|
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||||
{
|
{
|
||||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||||
Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
|
Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
|
||||||
@ -257,7 +257,12 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
|||||||
{
|
{
|
||||||
auto prev_node = it->value;
|
auto prev_node = it->value;
|
||||||
if (prev_node.stat.ephemeralOwner != 0)
|
if (prev_node.stat.ephemeralOwner != 0)
|
||||||
ephemerals[session_id].erase(request.path);
|
{
|
||||||
|
auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner);
|
||||||
|
ephemerals_it->second.erase(request.path);
|
||||||
|
if (ephemerals_it->second.empty())
|
||||||
|
ephemerals.erase(ephemerals_it);
|
||||||
|
}
|
||||||
|
|
||||||
auto child_basename = getBaseName(it->key);
|
auto child_basename = getBaseName(it->key);
|
||||||
container.updateValue(parentPath(request.path), [&child_basename] (NuKeeperStorage::Node & parent)
|
container.updateValue(parentPath(request.path), [&child_basename] (NuKeeperStorage::Node & parent)
|
||||||
@ -271,10 +276,10 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
|||||||
|
|
||||||
container.erase(request.path);
|
container.erase(request.path);
|
||||||
|
|
||||||
undo = [prev_node, &container, &ephemerals, session_id, path = request.path, child_basename]
|
undo = [prev_node, &container, &ephemerals, path = request.path, child_basename]
|
||||||
{
|
{
|
||||||
if (prev_node.stat.ephemeralOwner != 0)
|
if (prev_node.stat.ephemeralOwner != 0)
|
||||||
ephemerals[session_id].emplace(path);
|
ephemerals[prev_node.stat.ephemeralOwner].emplace(path);
|
||||||
|
|
||||||
container.insert(path, prev_node);
|
container.insert(path, prev_node);
|
||||||
container.updateValue(parentPath(path), [&child_basename] (NuKeeperStorage::Node & parent)
|
container.updateValue(parentPath(path), [&child_basename] (NuKeeperStorage::Node & parent)
|
||||||
@ -377,7 +382,6 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
|
|||||||
{
|
{
|
||||||
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
|
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
|
struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
|
||||||
@ -641,6 +645,13 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
|
|||||||
for (const auto & ephemeral_path : it->second)
|
for (const auto & ephemeral_path : it->second)
|
||||||
{
|
{
|
||||||
container.erase(ephemeral_path);
|
container.erase(ephemeral_path);
|
||||||
|
container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (NuKeeperStorage::Node & parent)
|
||||||
|
{
|
||||||
|
--parent.stat.numChildren;
|
||||||
|
++parent.stat.cversion;
|
||||||
|
parent.children.erase(getBaseName(ephemeral_path));
|
||||||
|
});
|
||||||
|
|
||||||
auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED);
|
auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED);
|
||||||
results.insert(results.end(), responses.begin(), responses.end());
|
results.insert(results.end(), responses.begin(), responses.end());
|
||||||
}
|
}
|
||||||
|
@ -131,4 +131,6 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using NuKeeperStoragePtr = std::unique_ptr<NuKeeperStorage>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -132,6 +132,10 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
|
|||||||
|
|
||||||
coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
|
coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
|
||||||
|
|
||||||
|
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||||
|
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
||||||
|
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
|
||||||
|
|
||||||
server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
|
server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -148,10 +152,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
|
||||||
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
|
||||||
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
|
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
|
||||||
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
|
|
||||||
|
|
||||||
LOG_DEBUG(log, "Dispatcher initialized");
|
LOG_DEBUG(log, "Dispatcher initialized");
|
||||||
}
|
}
|
||||||
|
@ -897,25 +897,25 @@ TEST(CoordinationTest, TestStorageSnapshotSimple)
|
|||||||
manager.serializeSnapshotBufferToDisk(*buf, 2);
|
manager.serializeSnapshotBufferToDisk(*buf, 2);
|
||||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin"));
|
EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin"));
|
||||||
|
|
||||||
DB::NuKeeperStorage restored_storage(500);
|
|
||||||
|
|
||||||
auto debuf = manager.deserializeSnapshotBufferFromDisk(2);
|
auto debuf = manager.deserializeSnapshotBufferFromDisk(2);
|
||||||
manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
|
|
||||||
|
|
||||||
EXPECT_EQ(restored_storage.container.size(), 3);
|
auto [snapshot_meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/").children.size(), 1);
|
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello").children.size(), 1);
|
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").children.size(), 0);
|
|
||||||
|
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/").data, "");
|
EXPECT_EQ(restored_storage->container.size(), 3);
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello").data, "world");
|
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").data, "somedata");
|
EXPECT_EQ(restored_storage->container.getValue("/hello").children.size(), 1);
|
||||||
EXPECT_EQ(restored_storage.session_id_counter, 7);
|
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").children.size(), 0);
|
||||||
EXPECT_EQ(restored_storage.zxid, 2);
|
|
||||||
EXPECT_EQ(restored_storage.ephemerals.size(), 2);
|
EXPECT_EQ(restored_storage->container.getValue("/").data, "");
|
||||||
EXPECT_EQ(restored_storage.ephemerals[3].size(), 1);
|
EXPECT_EQ(restored_storage->container.getValue("/hello").data, "world");
|
||||||
EXPECT_EQ(restored_storage.ephemerals[1].size(), 1);
|
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").data, "somedata");
|
||||||
EXPECT_EQ(restored_storage.session_and_timeout.size(), 2);
|
EXPECT_EQ(restored_storage->session_id_counter, 7);
|
||||||
|
EXPECT_EQ(restored_storage->zxid, 2);
|
||||||
|
EXPECT_EQ(restored_storage->ephemerals.size(), 2);
|
||||||
|
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1);
|
||||||
|
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1);
|
||||||
|
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
|
TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
|
||||||
@ -946,15 +946,14 @@ TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
|
|||||||
manager.serializeSnapshotBufferToDisk(*buf, 50);
|
manager.serializeSnapshotBufferToDisk(*buf, 50);
|
||||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin"));
|
EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin"));
|
||||||
|
|
||||||
DB::NuKeeperStorage restored_storage(500);
|
|
||||||
|
|
||||||
auto debuf = manager.deserializeSnapshotBufferFromDisk(50);
|
auto debuf = manager.deserializeSnapshotBufferFromDisk(50);
|
||||||
manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
|
auto [meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
|
||||||
|
|
||||||
EXPECT_EQ(restored_storage.container.size(), 51);
|
EXPECT_EQ(restored_storage->container.size(), 51);
|
||||||
for (size_t i = 0; i < 50; ++i)
|
for (size_t i = 0; i < 50; ++i)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -987,14 +986,13 @@ TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
|
|||||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin"));
|
EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin"));
|
||||||
|
|
||||||
|
|
||||||
DB::NuKeeperStorage restored_storage(500);
|
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
|
||||||
manager.restoreFromLatestSnapshot(&restored_storage);
|
|
||||||
|
|
||||||
EXPECT_EQ(restored_storage.container.size(), 251);
|
EXPECT_EQ(restored_storage->container.size(), 251);
|
||||||
|
|
||||||
for (size_t i = 0; i < 250; ++i)
|
for (size_t i = 0; i < 250; ++i)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1040,12 +1038,11 @@ TEST(CoordinationTest, TestStorageSnapshotMode)
|
|||||||
EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
|
EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
DB::NuKeeperStorage restored_storage(500);
|
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
|
||||||
manager.restoreFromLatestSnapshot(&restored_storage);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < 50; ++i)
|
for (size_t i = 0; i < 50; ++i)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1071,8 +1068,7 @@ TEST(CoordinationTest, TestStorageSnapshotBroken)
|
|||||||
plain_buf.truncate(34);
|
plain_buf.truncate(34);
|
||||||
plain_buf.sync();
|
plain_buf.sync();
|
||||||
|
|
||||||
DB::NuKeeperStorage restored_storage(500);
|
EXPECT_THROW(manager.restoreFromLatestSnapshot(), DB::Exception);
|
||||||
EXPECT_THROW(manager.restoreFromLatestSnapshot(&restored_storage), DB::Exception);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nuraft::ptr<nuraft::buffer> getBufferFromZKRequest(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
|
nuraft::ptr<nuraft::buffer> getBufferFromZKRequest(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
|
||||||
@ -1236,6 +1232,37 @@ TEST(CoordinationTest, TestStateMachineAndLogStore)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CoordinationTest, TestEphemeralNodeRemove)
|
||||||
|
{
|
||||||
|
using namespace Coordination;
|
||||||
|
using namespace DB;
|
||||||
|
|
||||||
|
ChangelogDirTest snapshots("./snapshots");
|
||||||
|
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
|
||||||
|
|
||||||
|
ResponsesQueue queue;
|
||||||
|
SnapshotsQueue snapshots_queue{1};
|
||||||
|
auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
|
||||||
|
state_machine->init();
|
||||||
|
|
||||||
|
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
|
||||||
|
request_c->path = "/hello";
|
||||||
|
request_c->is_ephemeral = true;
|
||||||
|
auto entry_c = getLogEntryFromZKRequest(0, 1, request_c);
|
||||||
|
state_machine->commit(1, entry_c->get_buf());
|
||||||
|
const auto & storage = state_machine->getStorage();
|
||||||
|
|
||||||
|
EXPECT_EQ(storage.ephemerals.size(), 1);
|
||||||
|
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
|
||||||
|
request_d->path = "/hello";
|
||||||
|
/// Delete from other session
|
||||||
|
auto entry_d = getLogEntryFromZKRequest(0, 2, request_d);
|
||||||
|
state_machine->commit(2, entry_d->get_buf());
|
||||||
|
|
||||||
|
EXPECT_EQ(storage.ephemerals.size(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
||||||
|
@ -953,3 +953,26 @@ void writeFieldText(const Field & x, WriteBuffer & buf);
|
|||||||
String toString(const Field & x);
|
String toString(const Field & x);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<DB::Field>
|
||||||
|
{
|
||||||
|
constexpr auto parse(format_parse_context & ctx)
|
||||||
|
{
|
||||||
|
auto it = ctx.begin();
|
||||||
|
auto end = ctx.end();
|
||||||
|
|
||||||
|
/// Only support {}.
|
||||||
|
if (it != end && *it != '}')
|
||||||
|
throw format_error("invalid format");
|
||||||
|
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename FormatContext>
|
||||||
|
auto format(const DB::Field & x, FormatContext & ctx)
|
||||||
|
{
|
||||||
|
return format_to(ctx.out(), "{}", toString(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <sparsehash/dense_hash_map>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -101,7 +101,7 @@ template <DictionaryKeyType dictionary_key_type>
|
|||||||
double CacheDictionary<dictionary_key_type>::getLoadFactor() const
|
double CacheDictionary<dictionary_key_type>::getLoadFactor() const
|
||||||
{
|
{
|
||||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||||
return static_cast<double>(cache_storage_ptr->getSize()) / cache_storage_ptr->getMaxSize();
|
return cache_storage_ptr->getLoadFactor();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <DictionaryKeyType dictionary_key_type>
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
@ -333,9 +333,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumnsImpl(
|
|||||||
FetchResult result_of_fetch_from_storage;
|
FetchResult result_of_fetch_from_storage;
|
||||||
|
|
||||||
{
|
{
|
||||||
/// Write lock on storage
|
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
|
||||||
|
|
||||||
result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request);
|
result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
#include <pcg_random.hpp>
|
#include <pcg_random.hpp>
|
||||||
|
|
||||||
@ -30,28 +31,31 @@ struct CacheDictionaryStorageConfiguration
|
|||||||
const DictionaryLifetime lifetime;
|
const DictionaryLifetime lifetime;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Keys are stored in LRUCache and column values are serialized into arena.
|
/** ICacheDictionaryStorage implementation that keeps key in hash table with fixed collision length.
|
||||||
|
* Value in hash table point to index in attributes arrays.
|
||||||
Cell in LRUCache consists of allocated size and place in arena were columns serialized data is stored.
|
*/
|
||||||
|
|
||||||
Columns are serialized by rows.
|
|
||||||
|
|
||||||
When cell is removed from LRUCache data associated with it is also removed from arena.
|
|
||||||
|
|
||||||
In case of complex key we also store key data in arena and it is removed from arena.
|
|
||||||
*/
|
|
||||||
template <DictionaryKeyType dictionary_key_type>
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
class CacheDictionaryStorage final : public ICacheDictionaryStorage
|
class CacheDictionaryStorage final : public ICacheDictionaryStorage
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static constexpr size_t max_collision_length = 10;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
|
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
|
||||||
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
|
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
|
||||||
|
|
||||||
explicit CacheDictionaryStorage(CacheDictionaryStorageConfiguration & configuration_)
|
explicit CacheDictionaryStorage(
|
||||||
|
const DictionaryStructure & dictionary_structure,
|
||||||
|
CacheDictionaryStorageConfiguration & configuration_)
|
||||||
: configuration(configuration_)
|
: configuration(configuration_)
|
||||||
, rnd_engine(randomSeed())
|
, rnd_engine(randomSeed())
|
||||||
, cache(configuration.max_size_in_cells, false, { arena })
|
|
||||||
{
|
{
|
||||||
|
size_t cells_size = roundUpToPowerOfTwoOrZero(std::max(configuration.max_size_in_cells, max_collision_length));
|
||||||
|
|
||||||
|
cells.resize_fill(cells_size);
|
||||||
|
size_overlap_mask = cells_size - 1;
|
||||||
|
|
||||||
|
setup(dictionary_structure);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
|
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
|
||||||
@ -71,9 +75,7 @@ public:
|
|||||||
const DictionaryStorageFetchRequest & fetch_request) override
|
const DictionaryStorageFetchRequest & fetch_request) override
|
||||||
{
|
{
|
||||||
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
|
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
|
||||||
{
|
|
||||||
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
|
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
|
throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
@ -109,9 +111,7 @@ public:
|
|||||||
const DictionaryStorageFetchRequest & column_fetch_requests) override
|
const DictionaryStorageFetchRequest & column_fetch_requests) override
|
||||||
{
|
{
|
||||||
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
||||||
{
|
|
||||||
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
|
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
@ -140,79 +140,162 @@ public:
|
|||||||
throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getSize() const override { return cache.size(); }
|
size_t getSize() const override { return size; }
|
||||||
|
|
||||||
size_t getMaxSize() const override { return cache.getMaxSize(); }
|
double getLoadFactor() const override { return static_cast<double>(size) / configuration.max_size_in_cells; }
|
||||||
|
|
||||||
size_t getBytesAllocated() const override { return arena.size() + cache.getSizeInBytes(); }
|
size_t getBytesAllocated() const override
|
||||||
|
{
|
||||||
|
size_t attributes_size_in_bytes = 0;
|
||||||
|
size_t attributes_size = attributes.size();
|
||||||
|
|
||||||
|
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
||||||
|
{
|
||||||
|
getAttributeContainer(attribute_index, [&](const auto & container)
|
||||||
|
{
|
||||||
|
attributes_size_in_bytes += container.capacity() * sizeof(container[0]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
struct FetchedKey
|
||||||
|
{
|
||||||
|
FetchedKey(size_t element_index_, bool is_default_)
|
||||||
|
: element_index(element_index_)
|
||||||
|
, is_default(is_default_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
size_t element_index;
|
||||||
|
bool is_default;
|
||||||
|
};
|
||||||
|
|
||||||
template <typename KeysStorageFetchResult>
|
template <typename KeysStorageFetchResult>
|
||||||
ALWAYS_INLINE KeysStorageFetchResult fetchColumnsForKeysImpl(
|
KeysStorageFetchResult fetchColumnsForKeysImpl(
|
||||||
const PaddedPODArray<KeyType> & keys,
|
const PaddedPODArray<KeyType> & keys,
|
||||||
const DictionaryStorageFetchRequest & fetch_request)
|
const DictionaryStorageFetchRequest & fetch_request)
|
||||||
{
|
{
|
||||||
KeysStorageFetchResult result;
|
KeysStorageFetchResult result;
|
||||||
|
|
||||||
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
||||||
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
|
result.key_index_to_state.resize_fill(keys.size());
|
||||||
|
|
||||||
const auto now = std::chrono::system_clock::now();
|
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||||
|
|
||||||
size_t fetched_columns_index = 0;
|
size_t fetched_columns_index = 0;
|
||||||
|
size_t keys_size = keys.size();
|
||||||
|
|
||||||
std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
|
std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
|
||||||
|
|
||||||
size_t keys_size = keys.size();
|
PaddedPODArray<FetchedKey> fetched_keys;
|
||||||
|
fetched_keys.resize_fill(keys_size);
|
||||||
|
|
||||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
auto key = keys[key_index];
|
auto key = keys[key_index];
|
||||||
auto * it = cache.find(key);
|
auto [key_state, cell_index] = getKeyStateAndCellIndex(key, now);
|
||||||
|
|
||||||
if (it)
|
if (unlikely(key_state == KeyState::not_found))
|
||||||
{
|
{
|
||||||
/// Columns values for key are serialized in cache now deserialize them
|
result.key_index_to_state[key_index] = {KeyState::not_found};
|
||||||
const auto & cell = it->getMapped();
|
++result.not_found_keys_size;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
bool has_deadline = cellHasDeadline(cell);
|
auto & cell = cells[cell_index];
|
||||||
|
|
||||||
if (has_deadline && now > cell.deadline + max_lifetime_seconds)
|
result.expired_keys_size += static_cast<size_t>(key_state == KeyState::expired);
|
||||||
{
|
|
||||||
result.key_index_to_state[key_index] = {KeyState::not_found};
|
|
||||||
++result.not_found_keys_size;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else if (has_deadline && now > cell.deadline)
|
|
||||||
{
|
|
||||||
result.key_index_to_state[key_index] = {KeyState::expired, fetched_columns_index};
|
|
||||||
++result.expired_keys_size;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result.key_index_to_state[key_index] = {KeyState::found, fetched_columns_index};
|
|
||||||
++result.found_keys_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
++fetched_columns_index;
|
result.key_index_to_state[key_index] = {key_state, fetched_columns_index};
|
||||||
|
fetched_keys[fetched_columns_index] = FetchedKey(cell.element_index, cell.is_default);
|
||||||
|
|
||||||
if (cell.isDefault())
|
++fetched_columns_index;
|
||||||
|
|
||||||
|
result.key_index_to_state[key_index].setDefaultValue(cell.is_default);
|
||||||
|
result.default_keys_size += cell.is_default;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.found_keys_size = keys_size - (result.expired_keys_size + result.not_found_keys_size);
|
||||||
|
|
||||||
|
for (size_t attribute_index = 0; attribute_index < fetch_request.attributesSize(); ++attribute_index)
|
||||||
|
{
|
||||||
|
if (!fetch_request.shouldFillResultColumnWithIndex(attribute_index))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto & attribute = attributes[attribute_index];
|
||||||
|
const auto & default_value_provider = fetch_request.defaultValueProviderAtIndex(attribute_index);
|
||||||
|
|
||||||
|
size_t fetched_keys_size = fetched_keys.size();
|
||||||
|
auto & fetched_column = *result.fetched_columns[attribute_index];
|
||||||
|
fetched_column.reserve(fetched_keys_size);
|
||||||
|
|
||||||
|
if (unlikely(attribute.is_complex_type))
|
||||||
|
{
|
||||||
|
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
|
||||||
|
|
||||||
|
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||||
{
|
{
|
||||||
result.key_index_to_state[key_index].setDefault();
|
auto fetched_key = fetched_keys[fetched_key_index];
|
||||||
++result.default_keys_size;
|
|
||||||
insertDefaultValuesIntoColumns(result.fetched_columns, fetch_request, key_index);
|
if (unlikely(fetched_key.is_default))
|
||||||
}
|
fetched_column.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||||
else
|
else
|
||||||
{
|
fetched_column.insert(container[fetched_key.element_index]);
|
||||||
const char * place_for_serialized_columns = cell.place_for_serialized_columns;
|
|
||||||
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, place_for_serialized_columns);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result.key_index_to_state[key_index] = {KeyState::not_found};
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||||
++result.not_found_keys_size;
|
{
|
||||||
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
|
using AttributeType = typename Type::AttributeType;
|
||||||
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
|
using ColumnType =
|
||||||
|
std::conditional_t<std::is_same_v<AttributeType, String>, ColumnString,
|
||||||
|
std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>,
|
||||||
|
ColumnVector<AttributeType>>>;
|
||||||
|
|
||||||
|
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
|
||||||
|
ColumnType & column_typed = static_cast<ColumnType &>(fetched_column);
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<ColumnType, ColumnString>)
|
||||||
|
{
|
||||||
|
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||||
|
{
|
||||||
|
auto fetched_key = fetched_keys[fetched_key_index];
|
||||||
|
|
||||||
|
if (unlikely(fetched_key.is_default))
|
||||||
|
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto item = container[fetched_key.element_index];
|
||||||
|
column_typed.insertData(item.data, item.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto & data = column_typed.getData();
|
||||||
|
|
||||||
|
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||||
|
{
|
||||||
|
auto fetched_key = fetched_keys[fetched_key_index];
|
||||||
|
|
||||||
|
if (unlikely(fetched_key.is_default))
|
||||||
|
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto item = container[fetched_key.element_index];
|
||||||
|
data.push_back(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -221,58 +304,108 @@ private:
|
|||||||
|
|
||||||
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
|
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
|
||||||
{
|
{
|
||||||
Arena temporary_values_pool;
|
|
||||||
|
|
||||||
size_t columns_to_serialize_size = columns.size();
|
|
||||||
PaddedPODArray<StringRef> temporary_column_data(columns_to_serialize_size);
|
|
||||||
|
|
||||||
const auto now = std::chrono::system_clock::now();
|
const auto now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
size_t keys_size = keys.size();
|
Field column_value;
|
||||||
|
|
||||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
for (size_t key_index = 0; key_index < keys.size(); ++key_index)
|
||||||
{
|
{
|
||||||
size_t allocated_size_for_columns = 0;
|
|
||||||
const char * block_start = nullptr;
|
|
||||||
|
|
||||||
auto key = keys[key_index];
|
auto key = keys[key_index];
|
||||||
auto * it = cache.find(key);
|
|
||||||
|
|
||||||
for (size_t column_index = 0; column_index < columns_to_serialize_size; ++column_index)
|
size_t cell_index = getCellIndexForInsert(key);
|
||||||
|
auto & cell = cells[cell_index];
|
||||||
|
|
||||||
|
bool cell_was_default = cell.is_default;
|
||||||
|
cell.is_default = false;
|
||||||
|
|
||||||
|
bool was_inserted = cell.deadline == 0;
|
||||||
|
|
||||||
|
if (was_inserted)
|
||||||
{
|
{
|
||||||
auto & column = columns[column_index];
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
temporary_column_data[column_index] = column->serializeValueIntoArena(key_index, temporary_values_pool, block_start);
|
cell.key = copyStringInArena(key);
|
||||||
allocated_size_for_columns += temporary_column_data[column_index].size;
|
else
|
||||||
}
|
cell.key = key;
|
||||||
|
|
||||||
char * place_for_serialized_columns = arena.alloc(allocated_size_for_columns);
|
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
||||||
memcpy(reinterpret_cast<void*>(place_for_serialized_columns), reinterpret_cast<const void*>(block_start), allocated_size_for_columns);
|
{
|
||||||
|
auto & column = columns[attribute_index];
|
||||||
|
|
||||||
if (it)
|
getAttributeContainer(attribute_index, [&](auto & container)
|
||||||
{
|
{
|
||||||
/// Cell exists need to free previous serialized place and update deadline
|
container.emplace_back();
|
||||||
auto & cell = it->getMapped();
|
cell.element_index = container.size() - 1;
|
||||||
|
|
||||||
if (cell.place_for_serialized_columns)
|
using ElementType = std::decay_t<decltype(container[0])>;
|
||||||
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
|
|
||||||
|
|
||||||
setCellDeadline(cell, now);
|
column->get(key_index, column_value);
|
||||||
cell.allocated_size_for_columns = allocated_size_for_columns;
|
|
||||||
cell.place_for_serialized_columns = place_for_serialized_columns;
|
if constexpr (std::is_same_v<ElementType, Field>)
|
||||||
|
container.back() = column_value;
|
||||||
|
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||||
|
{
|
||||||
|
const String & string_value = column_value.get<String>();
|
||||||
|
StringRef string_value_ref = StringRef {string_value.data(), string_value.size()};
|
||||||
|
StringRef inserted_value = copyStringInArena(string_value_ref);
|
||||||
|
container.back() = inserted_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
container.back() = column_value.get<NearestFieldType<ElementType>>();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
++size;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// No cell exists so create and put in cache
|
if (cell.key != key)
|
||||||
Cell cell;
|
{
|
||||||
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
|
{
|
||||||
|
char * data = const_cast<char *>(cell.key.data);
|
||||||
|
arena.free(data, cell.key.size);
|
||||||
|
cell.key = copyStringInArena(key);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cell.key = key;
|
||||||
|
}
|
||||||
|
|
||||||
setCellDeadline(cell, now);
|
/// Put values into existing index
|
||||||
cell.allocated_size_for_columns = allocated_size_for_columns;
|
size_t index_to_use = cell.element_index;
|
||||||
cell.place_for_serialized_columns = place_for_serialized_columns;
|
|
||||||
|
|
||||||
insertCellInCache(key, cell);
|
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
||||||
|
{
|
||||||
|
auto & column = columns[attribute_index];
|
||||||
|
|
||||||
|
getAttributeContainer(attribute_index, [&](auto & container)
|
||||||
|
{
|
||||||
|
using ElementType = std::decay_t<decltype(container[0])>;
|
||||||
|
|
||||||
|
column->get(key_index, column_value);
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<ElementType, Field>)
|
||||||
|
container[index_to_use] = column_value;
|
||||||
|
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||||
|
{
|
||||||
|
const String & string_value = column_value.get<String>();
|
||||||
|
StringRef string_ref_value = StringRef {string_value.data(), string_value.size()};
|
||||||
|
StringRef inserted_value = copyStringInArena(string_ref_value);
|
||||||
|
|
||||||
|
if (!cell_was_default)
|
||||||
|
{
|
||||||
|
StringRef previous_value = container[index_to_use];
|
||||||
|
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
container[index_to_use] = inserted_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
container[index_to_use] = column_value.get<NearestFieldType<ElementType>>();
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
temporary_values_pool.rollback(allocated_size_for_columns);
|
setCellDeadline(cell, now);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,94 +413,224 @@ private:
|
|||||||
{
|
{
|
||||||
const auto now = std::chrono::system_clock::now();
|
const auto now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
for (auto key : keys)
|
size_t keys_size = keys.size();
|
||||||
|
|
||||||
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
auto * it = cache.find(key);
|
auto key = keys[key_index];
|
||||||
|
|
||||||
if (it)
|
size_t cell_index = getCellIndexForInsert(key);
|
||||||
|
auto & cell = cells[cell_index];
|
||||||
|
|
||||||
|
bool was_inserted = cell.deadline == 0;
|
||||||
|
bool cell_was_default = cell.is_default;
|
||||||
|
|
||||||
|
cell.is_default = true;
|
||||||
|
|
||||||
|
if (was_inserted)
|
||||||
{
|
{
|
||||||
auto & cell = it->getMapped();
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
|
cell.key = copyStringInArena(key);
|
||||||
|
else
|
||||||
|
cell.key = key;
|
||||||
|
|
||||||
setCellDeadline(cell, now);
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||||
|
{
|
||||||
|
getAttributeContainer(attribute_index, [&](auto & container)
|
||||||
|
{
|
||||||
|
container.emplace_back();
|
||||||
|
cell.element_index = container.size() - 1;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (cell.place_for_serialized_columns)
|
++size;
|
||||||
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
|
|
||||||
|
|
||||||
cell.allocated_size_for_columns = 0;
|
|
||||||
cell.place_for_serialized_columns = nullptr;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Cell cell;
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||||
|
{
|
||||||
|
getAttributeContainer(attribute_index, [&](const auto & container)
|
||||||
|
{
|
||||||
|
using ElementType = std::decay_t<decltype(container[0])>;
|
||||||
|
|
||||||
setCellDeadline(cell, now);
|
if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||||
cell.allocated_size_for_columns = 0;
|
{
|
||||||
cell.place_for_serialized_columns = nullptr;
|
if (!cell_was_default)
|
||||||
|
{
|
||||||
|
StringRef previous_value = container[cell.element_index];
|
||||||
|
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
insertCellInCache(key, cell);
|
if (cell.key != key)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
|
{
|
||||||
|
char * data = const_cast<char *>(cell.key.data);
|
||||||
|
arena.free(data, cell.key.size);
|
||||||
|
cell.key = copyStringInArena(key);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cell.key = key;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setCellDeadline(cell, now);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PaddedPODArray<KeyType> getCachedKeysImpl() const
|
PaddedPODArray<KeyType> getCachedKeysImpl() const
|
||||||
{
|
{
|
||||||
PaddedPODArray<KeyType> result;
|
PaddedPODArray<KeyType> result;
|
||||||
result.reserve(cache.size());
|
result.reserve(size);
|
||||||
|
|
||||||
for (auto & node : cache)
|
for (auto & cell : cells)
|
||||||
{
|
{
|
||||||
auto & cell = node.getMapped();
|
if (cell.deadline == 0)
|
||||||
|
|
||||||
if (cell.isDefault())
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
result.emplace_back(node.getKey());
|
if (cell.is_default)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
result.emplace_back(cell.key);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GetContainerFunc>
|
||||||
|
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func)
|
||||||
|
{
|
||||||
|
auto & attribute = attributes[attribute_index];
|
||||||
|
auto & attribute_type = attribute.type;
|
||||||
|
|
||||||
|
if (unlikely(attribute.is_complex_type))
|
||||||
|
{
|
||||||
|
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
|
||||||
|
std::forward<GetContainerFunc>(func)(container);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||||
|
{
|
||||||
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
|
using AttributeType = typename Type::AttributeType;
|
||||||
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
|
|
||||||
|
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
|
||||||
|
std::forward<GetContainerFunc>(func)(container);
|
||||||
|
};
|
||||||
|
|
||||||
|
callOnDictionaryAttributeType(attribute_type, type_call);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GetContainerFunc>
|
||||||
|
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func) const
|
||||||
|
{
|
||||||
|
return const_cast<std::decay_t<decltype(*this)> *>(this)->template getAttributeContainer(attribute_index, std::forward<GetContainerFunc>(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef copyStringInArena(StringRef value_to_copy)
|
||||||
|
{
|
||||||
|
size_t value_to_copy_size = value_to_copy.size;
|
||||||
|
char * place_for_key = arena.alloc(value_to_copy_size);
|
||||||
|
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data), value_to_copy_size);
|
||||||
|
StringRef updated_value{place_for_key, value_to_copy_size};
|
||||||
|
|
||||||
|
return updated_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setup(const DictionaryStructure & dictionary_structure)
|
||||||
|
{
|
||||||
|
/// For each dictionary attribute create storage attribute
|
||||||
|
/// For simple attributes create PODArray, for complex vector of Fields
|
||||||
|
|
||||||
|
attributes.reserve(dictionary_structure.attributes.size());
|
||||||
|
|
||||||
|
for (const auto & dictionary_attribute : dictionary_structure.attributes)
|
||||||
|
{
|
||||||
|
auto attribute_type = dictionary_attribute.underlying_type;
|
||||||
|
|
||||||
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||||
|
{
|
||||||
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||||
|
using AttributeType = typename Type::AttributeType;
|
||||||
|
using ValueType = DictionaryValueType<AttributeType>;
|
||||||
|
|
||||||
|
attributes.emplace_back();
|
||||||
|
auto & last_attribute = attributes.back();
|
||||||
|
last_attribute.type = attribute_type;
|
||||||
|
last_attribute.is_complex_type = dictionary_attribute.is_nullable || dictionary_attribute.is_array;
|
||||||
|
|
||||||
|
if (dictionary_attribute.is_nullable)
|
||||||
|
last_attribute.attribute_container = std::vector<Field>();
|
||||||
|
else
|
||||||
|
last_attribute.attribute_container = PaddedPODArray<ValueType>();
|
||||||
|
};
|
||||||
|
|
||||||
|
callOnDictionaryAttributeType(attribute_type, type_call);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
using TimePoint = std::chrono::system_clock::time_point;
|
using TimePoint = std::chrono::system_clock::time_point;
|
||||||
|
|
||||||
struct Cell
|
struct Cell
|
||||||
{
|
{
|
||||||
TimePoint deadline;
|
KeyType key;
|
||||||
size_t allocated_size_for_columns;
|
size_t element_index;
|
||||||
char * place_for_serialized_columns;
|
bool is_default;
|
||||||
|
time_t deadline;
|
||||||
inline bool isDefault() const { return place_for_serialized_columns == nullptr; }
|
|
||||||
inline void setDefault()
|
|
||||||
{
|
|
||||||
place_for_serialized_columns = nullptr;
|
|
||||||
allocated_size_for_columns = 0;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void insertCellInCache(KeyType & key, const Cell & cell)
|
struct Attribute
|
||||||
{
|
{
|
||||||
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
AttributeUnderlyingType type;
|
||||||
{
|
bool is_complex_type;
|
||||||
/// Copy complex key into arena and put in cache
|
|
||||||
size_t key_size = key.size;
|
|
||||||
char * place_for_key = arena.alloc(key_size);
|
|
||||||
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
|
|
||||||
KeyType updated_key{place_for_key, key_size};
|
|
||||||
key = updated_key;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache.insert(key, cell);
|
std::variant<
|
||||||
}
|
PaddedPODArray<UInt8>,
|
||||||
|
PaddedPODArray<UInt16>,
|
||||||
|
PaddedPODArray<UInt32>,
|
||||||
|
PaddedPODArray<UInt64>,
|
||||||
|
PaddedPODArray<UInt128>,
|
||||||
|
PaddedPODArray<Int8>,
|
||||||
|
PaddedPODArray<Int16>,
|
||||||
|
PaddedPODArray<Int32>,
|
||||||
|
PaddedPODArray<Int64>,
|
||||||
|
PaddedPODArray<Decimal32>,
|
||||||
|
PaddedPODArray<Decimal64>,
|
||||||
|
PaddedPODArray<Decimal128>,
|
||||||
|
PaddedPODArray<Float32>,
|
||||||
|
PaddedPODArray<Float64>,
|
||||||
|
PaddedPODArray<StringRef>,
|
||||||
|
std::vector<Field>> attribute_container;
|
||||||
|
};
|
||||||
|
|
||||||
inline static bool cellHasDeadline(const Cell & cell)
|
CacheDictionaryStorageConfiguration configuration;
|
||||||
{
|
|
||||||
return cell.deadline != std::chrono::system_clock::from_time_t(0);
|
pcg64 rnd_engine;
|
||||||
}
|
|
||||||
|
size_t size_overlap_mask = 0;
|
||||||
|
|
||||||
|
size_t size = 0;
|
||||||
|
|
||||||
|
PaddedPODArray<Cell> cells;
|
||||||
|
|
||||||
|
ArenaWithFreeLists arena;
|
||||||
|
|
||||||
|
std::vector<Attribute> attributes;
|
||||||
|
|
||||||
inline void setCellDeadline(Cell & cell, TimePoint now)
|
inline void setCellDeadline(Cell & cell, TimePoint now)
|
||||||
{
|
{
|
||||||
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
||||||
{
|
{
|
||||||
cell.deadline = std::chrono::system_clock::from_time_t(0);
|
/// This maybe not obvious, but when we define is this cell is expired or expired permanently, we add strict_max_lifetime_seconds
|
||||||
|
/// to the expiration time. And it overflows pretty well.
|
||||||
|
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
|
||||||
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -375,44 +638,75 @@ private:
|
|||||||
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
||||||
|
|
||||||
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
||||||
cell.deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
|
||||||
|
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
||||||
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename>
|
inline size_t getCellIndex(const KeyType key) const
|
||||||
friend class ArenaCellDisposer;
|
|
||||||
|
|
||||||
CacheDictionaryStorageConfiguration configuration;
|
|
||||||
|
|
||||||
ArenaWithFreeLists arena;
|
|
||||||
|
|
||||||
pcg64 rnd_engine;
|
|
||||||
|
|
||||||
class ArenaCellDisposer
|
|
||||||
{
|
{
|
||||||
public:
|
const size_t hash = DefaultHash<KeyType>()(key);
|
||||||
ArenaWithFreeLists & arena;
|
const size_t index = hash & size_overlap_mask;
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
|
||||||
void operator()(const Key & key, const Value & value) const
|
|
||||||
|
inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
|
||||||
|
{
|
||||||
|
size_t place_value = getCellIndex(key);
|
||||||
|
const size_t place_value_end = place_value + max_collision_length;
|
||||||
|
|
||||||
|
time_t max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
|
||||||
|
|
||||||
|
for (; place_value < place_value_end; ++place_value)
|
||||||
{
|
{
|
||||||
/// In case of complex key we keep it in arena
|
const auto cell_place_value = place_value & size_overlap_mask;
|
||||||
if constexpr (std::is_same_v<Key, StringRef>)
|
const auto & cell = cells[cell_place_value];
|
||||||
arena.free(const_cast<char *>(key.data), key.size);
|
|
||||||
|
|
||||||
if (value.place_for_serialized_columns)
|
if (cell.key != key)
|
||||||
arena.free(value.place_for_serialized_columns, value.allocated_size_for_columns);
|
continue;
|
||||||
|
|
||||||
|
if (unlikely(now > cell.deadline + max_lifetime_seconds))
|
||||||
|
return std::make_pair(KeyState::not_found, cell_place_value);
|
||||||
|
|
||||||
|
if (unlikely(now > cell.deadline))
|
||||||
|
return std::make_pair(KeyState::expired, cell_place_value);
|
||||||
|
|
||||||
|
return std::make_pair(KeyState::found, cell_place_value);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellDisposer>;
|
return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
|
||||||
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellDisposer>;
|
}
|
||||||
|
|
||||||
using CacheLRUHashMap = std::conditional_t<
|
inline size_t getCellIndexForInsert(const KeyType & key) const
|
||||||
dictionary_key_type == DictionaryKeyType::simple,
|
{
|
||||||
SimpleKeyLRUHashMap,
|
size_t place_value = getCellIndex(key);
|
||||||
ComplexKeyLRUHashMap>;
|
const size_t place_value_end = place_value + max_collision_length;
|
||||||
|
size_t oldest_place_value = place_value;
|
||||||
|
|
||||||
CacheLRUHashMap cache;
|
time_t oldest_time = std::numeric_limits<time_t>::max();
|
||||||
|
|
||||||
|
for (; place_value < place_value_end; ++place_value)
|
||||||
|
{
|
||||||
|
const size_t cell_place_value = place_value & size_overlap_mask;
|
||||||
|
const Cell cell = cells[cell_place_value];
|
||||||
|
|
||||||
|
if (cell.deadline == 0)
|
||||||
|
return cell_place_value;
|
||||||
|
|
||||||
|
if (cell.key == key)
|
||||||
|
return cell_place_value;
|
||||||
|
|
||||||
|
if (cell.deadline < oldest_time)
|
||||||
|
{
|
||||||
|
oldest_time = cell.deadline;
|
||||||
|
oldest_place_value = cell_place_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return oldest_place_value;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -12,9 +12,9 @@ struct KeyState
|
|||||||
{
|
{
|
||||||
enum State: uint8_t
|
enum State: uint8_t
|
||||||
{
|
{
|
||||||
not_found = 2,
|
not_found = 0,
|
||||||
expired = 4,
|
expired = 1,
|
||||||
found = 8,
|
found = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
KeyState(State state_, size_t fetched_column_index_)
|
KeyState(State state_, size_t fetched_column_index_)
|
||||||
@ -31,9 +31,10 @@ struct KeyState
|
|||||||
inline bool isNotFound() const { return state == State::not_found; }
|
inline bool isNotFound() const { return state == State::not_found; }
|
||||||
inline bool isDefault() const { return is_default; }
|
inline bool isDefault() const { return is_default; }
|
||||||
inline void setDefault() { is_default = true; }
|
inline void setDefault() { is_default = true; }
|
||||||
|
inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
|
||||||
/// Valid only if keyState is found or expired
|
/// Valid only if keyState is found or expired
|
||||||
inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
|
inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
|
||||||
|
inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
|
||||||
private:
|
private:
|
||||||
State state = not_found;
|
State state = not_found;
|
||||||
size_t fetched_column_index = 0;
|
size_t fetched_column_index = 0;
|
||||||
@ -111,8 +112,8 @@ public:
|
|||||||
/// Return size of keys in storage
|
/// Return size of keys in storage
|
||||||
virtual size_t getSize() const = 0;
|
virtual size_t getSize() const = 0;
|
||||||
|
|
||||||
/// Return maximum size of keys in storage
|
/// Returns storage load factor
|
||||||
virtual size_t getMaxSize() const = 0;
|
virtual double getLoadFactor() const = 0;
|
||||||
|
|
||||||
/// Return bytes allocated in storage
|
/// Return bytes allocated in storage
|
||||||
virtual size_t getBytesAllocated() const = 0;
|
virtual size_t getBytesAllocated() const = 0;
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#include <Common/Arena.h>
|
#include <Common/Arena.h>
|
||||||
#include <Common/ArenaWithFreeLists.h>
|
#include <Common/ArenaWithFreeLists.h>
|
||||||
#include <Common/MemorySanitizer.h>
|
#include <Common/MemorySanitizer.h>
|
||||||
#include <Common/HashTable/LRUHashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
#include <IO/AIO.h>
|
#include <IO/AIO.h>
|
||||||
#include <Dictionaries/DictionaryStructure.h>
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
#include <Dictionaries/ICacheDictionaryStorage.h>
|
#include <Dictionaries/ICacheDictionaryStorage.h>
|
||||||
@ -56,7 +56,6 @@ struct SSDCacheDictionaryStorageConfiguration
|
|||||||
|
|
||||||
const std::string file_path;
|
const std::string file_path;
|
||||||
const size_t max_partitions_count;
|
const size_t max_partitions_count;
|
||||||
const size_t max_stored_keys;
|
|
||||||
const size_t block_size;
|
const size_t block_size;
|
||||||
const size_t file_blocks_size;
|
const size_t file_blocks_size;
|
||||||
const size_t read_buffer_blocks_size;
|
const size_t read_buffer_blocks_size;
|
||||||
@ -127,7 +126,7 @@ public:
|
|||||||
|
|
||||||
/// Reset block with new block_data
|
/// Reset block with new block_data
|
||||||
/// block_data must be filled with zeroes if it is new block
|
/// block_data must be filled with zeroes if it is new block
|
||||||
ALWAYS_INLINE inline void reset(char * new_block_data)
|
inline void reset(char * new_block_data)
|
||||||
{
|
{
|
||||||
block_data = new_block_data;
|
block_data = new_block_data;
|
||||||
current_block_offset = block_header_size;
|
current_block_offset = block_header_size;
|
||||||
@ -135,13 +134,13 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Check if it is enough place to write key in block
|
/// Check if it is enough place to write key in block
|
||||||
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
|
inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
|
||||||
{
|
{
|
||||||
return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
|
return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if it is enough place to write key in block
|
/// Check if it is enough place to write key in block
|
||||||
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
|
inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
|
||||||
{
|
{
|
||||||
const StringRef & key = cache_key.key;
|
const StringRef & key = cache_key.key;
|
||||||
size_t complex_key_size = sizeof(key.size) + key.size;
|
size_t complex_key_size = sizeof(key.size) + key.size;
|
||||||
@ -152,7 +151,7 @@ public:
|
|||||||
/// Write key and returns offset in ssd cache block where data is written
|
/// Write key and returns offset in ssd cache block where data is written
|
||||||
/// It is client responsibility to check if there is enough place in block to write key
|
/// It is client responsibility to check if there is enough place in block to write key
|
||||||
/// Returns true if key was written and false if there was not enough place to write key
|
/// Returns true if key was written and false if there was not enough place to write key
|
||||||
ALWAYS_INLINE inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
|
inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
|
||||||
{
|
{
|
||||||
assert(cache_key.size > 0);
|
assert(cache_key.size > 0);
|
||||||
|
|
||||||
@ -181,7 +180,7 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
|
inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
|
||||||
{
|
{
|
||||||
assert(cache_key.size > 0);
|
assert(cache_key.size > 0);
|
||||||
|
|
||||||
@ -216,20 +215,20 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE inline size_t getKeysSize() const { return keys_size; }
|
inline size_t getKeysSize() const { return keys_size; }
|
||||||
|
|
||||||
/// Write keys size into block header
|
/// Write keys size into block header
|
||||||
ALWAYS_INLINE inline void writeKeysSize()
|
inline void writeKeysSize()
|
||||||
{
|
{
|
||||||
char * keys_size_offset_data = block_data + block_header_check_sum_size;
|
char * keys_size_offset_data = block_data + block_header_check_sum_size;
|
||||||
std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
|
std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get check sum from block header
|
/// Get check sum from block header
|
||||||
ALWAYS_INLINE inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
|
inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
|
||||||
|
|
||||||
/// Calculate check sum in block
|
/// Calculate check sum in block
|
||||||
ALWAYS_INLINE inline size_t calculateCheckSum() const
|
inline size_t calculateCheckSum() const
|
||||||
{
|
{
|
||||||
size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
||||||
|
|
||||||
@ -237,7 +236,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Check if check sum from block header matched calculated check sum in block
|
/// Check if check sum from block header matched calculated check sum in block
|
||||||
ALWAYS_INLINE inline bool checkCheckSum() const
|
inline bool checkCheckSum() const
|
||||||
{
|
{
|
||||||
size_t calculated_check_sum = calculateCheckSum();
|
size_t calculated_check_sum = calculateCheckSum();
|
||||||
size_t check_sum = getCheckSum();
|
size_t check_sum = getCheckSum();
|
||||||
@ -246,16 +245,16 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Write check sum in block header
|
/// Write check sum in block header
|
||||||
ALWAYS_INLINE inline void writeCheckSum()
|
inline void writeCheckSum()
|
||||||
{
|
{
|
||||||
size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
||||||
std::memcpy(block_data, &check_sum, sizeof(size_t));
|
std::memcpy(block_data, &check_sum, sizeof(size_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE inline size_t getBlockSize() const { return block_size; }
|
inline size_t getBlockSize() const { return block_size; }
|
||||||
|
|
||||||
/// Returns block data
|
/// Returns block data
|
||||||
ALWAYS_INLINE inline char * getBlockData() const { return block_data; }
|
inline char * getBlockData() const { return block_data; }
|
||||||
|
|
||||||
/// Read keys that were serialized in block
|
/// Read keys that were serialized in block
|
||||||
/// It is client responsibility to ensure that simple or complex keys were written in block
|
/// It is client responsibility to ensure that simple or complex keys were written in block
|
||||||
@ -337,9 +336,7 @@ inline bool operator==(const SSDCacheIndex & lhs, const SSDCacheIndex & rhs)
|
|||||||
return lhs.block_index == rhs.block_index && lhs.offset_in_block == rhs.offset_in_block;
|
return lhs.block_index == rhs.block_index && lhs.offset_in_block == rhs.offset_in_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** SSDCacheMemoryBuffer initialized with block size and memory buffer blocks size.
|
/** Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks and current write block.
|
||||||
* Allocate block_size * memory_buffer_blocks_size bytes with page alignment.
|
|
||||||
* Logically represents multiple memory_buffer_blocks_size blocks and current write block.
|
|
||||||
* If key cannot be written into current_write_block, current block keys size and check summ is written
|
* If key cannot be written into current_write_block, current block keys size and check summ is written
|
||||||
* and buffer increase index of current_write_block_index.
|
* and buffer increase index of current_write_block_index.
|
||||||
* If current_write_block_index == memory_buffer_blocks_size write key will always returns true.
|
* If current_write_block_index == memory_buffer_blocks_size write key will always returns true.
|
||||||
@ -444,7 +441,7 @@ private:
|
|||||||
size_t current_block_index = 0;
|
size_t current_block_index = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// TODO: Add documentation
|
/// Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks on file system
|
||||||
template <typename SSDCacheKeyType>
|
template <typename SSDCacheKeyType>
|
||||||
class SSDCacheFileBuffer : private boost::noncopyable
|
class SSDCacheFileBuffer : private boost::noncopyable
|
||||||
{
|
{
|
||||||
@ -614,11 +611,13 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename FetchBlockFunc>
|
template <typename FetchBlockFunc>
|
||||||
ALWAYS_INLINE void fetchBlocks(char * read_buffer, size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
|
void fetchBlocks(size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
|
||||||
{
|
{
|
||||||
if (blocks_to_fetch.empty())
|
if (blocks_to_fetch.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
Memory<Allocator<true>> read_buffer(read_from_file_buffer_blocks_size * block_size, 4096);
|
||||||
|
|
||||||
size_t blocks_to_fetch_size = blocks_to_fetch.size();
|
size_t blocks_to_fetch_size = blocks_to_fetch.size();
|
||||||
|
|
||||||
PaddedPODArray<iocb> requests;
|
PaddedPODArray<iocb> requests;
|
||||||
@ -631,7 +630,7 @@ public:
|
|||||||
{
|
{
|
||||||
iocb request{};
|
iocb request{};
|
||||||
|
|
||||||
char * buffer_place = read_buffer + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
|
char * buffer_place = read_buffer.data() + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
|
||||||
|
|
||||||
#if defined(__FreeBSD__)
|
#if defined(__FreeBSD__)
|
||||||
request.aio.aio_lio_opcode = LIO_READ;
|
request.aio.aio_lio_opcode = LIO_READ;
|
||||||
@ -751,7 +750,7 @@ private:
|
|||||||
int fd = -1;
|
int fd = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
ALWAYS_INLINE inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
|
inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
|
||||||
{
|
{
|
||||||
#if defined(__FreeBSD__)
|
#if defined(__FreeBSD__)
|
||||||
return posix_fallocate(fd, offset, len);
|
return posix_fallocate(fd, offset, len);
|
||||||
@ -760,7 +759,7 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE inline static char * getRequestBuffer(const iocb & request)
|
inline static char * getRequestBuffer(const iocb & request)
|
||||||
{
|
{
|
||||||
char * result = nullptr;
|
char * result = nullptr;
|
||||||
|
|
||||||
@ -773,7 +772,7 @@ private:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE inline static ssize_t eventResult(io_event & event)
|
inline static ssize_t eventResult(io_event & event)
|
||||||
{
|
{
|
||||||
ssize_t bytes_written;
|
ssize_t bytes_written;
|
||||||
|
|
||||||
@ -795,7 +794,13 @@ private:
|
|||||||
size_t current_blocks_size = 0;
|
size_t current_blocks_size = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// TODO: Add documentation
|
/** ICacheDictionaryStorage implementation that keeps column data serialized in memory index and in disk partitions.
|
||||||
|
* Data is first written in memory buffer.
|
||||||
|
* If memory buffer is full then buffer is flushed to disk partition.
|
||||||
|
* If memory buffer cannot be flushed to associated disk partition, then if partition
|
||||||
|
* can be allocated (current partition index < max_partitions_size) storage allocates new partition, if not old partitions are reused.
|
||||||
|
* Index maps key to partition block and offset.
|
||||||
|
*/
|
||||||
template <DictionaryKeyType dictionary_key_type>
|
template <DictionaryKeyType dictionary_key_type>
|
||||||
class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
|
class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
|
||||||
{
|
{
|
||||||
@ -806,9 +811,7 @@ public:
|
|||||||
explicit SSDCacheDictionaryStorage(const SSDCacheDictionaryStorageConfiguration & configuration_)
|
explicit SSDCacheDictionaryStorage(const SSDCacheDictionaryStorageConfiguration & configuration_)
|
||||||
: configuration(configuration_)
|
: configuration(configuration_)
|
||||||
, file_buffer(configuration_.file_path, configuration.block_size, configuration.file_blocks_size)
|
, file_buffer(configuration_.file_path, configuration.block_size, configuration.file_blocks_size)
|
||||||
, read_from_file_buffer(configuration_.block_size * configuration_.read_buffer_blocks_size, 4096)
|
|
||||||
, rnd_engine(randomSeed())
|
, rnd_engine(randomSeed())
|
||||||
, index(configuration.max_stored_keys, false, { complex_key_arena })
|
|
||||||
{
|
{
|
||||||
memory_buffer_partitions.emplace_back(configuration.block_size, configuration.write_buffer_blocks_size);
|
memory_buffer_partitions.emplace_back(configuration.block_size, configuration.write_buffer_blocks_size);
|
||||||
}
|
}
|
||||||
@ -897,14 +900,31 @@ public:
|
|||||||
|
|
||||||
size_t getSize() const override { return index.size(); }
|
size_t getSize() const override { return index.size(); }
|
||||||
|
|
||||||
size_t getMaxSize() const override {return index.getMaxSize(); }
|
double getLoadFactor() const override
|
||||||
|
{
|
||||||
|
size_t partitions_size = memory_buffer_partitions.size();
|
||||||
|
|
||||||
|
if (partitions_size == configuration.max_partitions_count)
|
||||||
|
return 1.0;
|
||||||
|
|
||||||
|
auto & current_memory_partition = memory_buffer_partitions[current_partition_index];
|
||||||
|
|
||||||
|
size_t full_partitions = partitions_size - 1;
|
||||||
|
size_t blocks_in_memory = (full_partitions * configuration.write_buffer_blocks_size) + current_memory_partition.getCurrentBlockIndex();
|
||||||
|
size_t blocks_on_disk = file_buffer.getCurrentBlockIndex();
|
||||||
|
|
||||||
|
size_t max_blocks_size = (configuration.file_blocks_size + configuration.write_buffer_blocks_size) * configuration.max_partitions_count;
|
||||||
|
|
||||||
|
double load_factor = static_cast<double>(blocks_in_memory + blocks_on_disk) / max_blocks_size;
|
||||||
|
return load_factor;
|
||||||
|
}
|
||||||
|
|
||||||
size_t getBytesAllocated() const override
|
size_t getBytesAllocated() const override
|
||||||
{
|
{
|
||||||
size_t memory_partitions_bytes_size = memory_buffer_partitions.size() * configuration.write_buffer_blocks_size * configuration.block_size;
|
size_t memory_partitions_bytes_size = memory_buffer_partitions.size() * configuration.write_buffer_blocks_size * configuration.block_size;
|
||||||
size_t file_partitions_bytes_size = memory_buffer_partitions.size() * configuration.file_blocks_size * configuration.block_size;
|
size_t file_partitions_bytes_size = memory_buffer_partitions.size() * configuration.file_blocks_size * configuration.block_size;
|
||||||
|
|
||||||
return index.getSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
|
return index.getBufferSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -920,8 +940,7 @@ private:
|
|||||||
default_value
|
default_value
|
||||||
};
|
};
|
||||||
|
|
||||||
TimePoint deadline;
|
time_t deadline;
|
||||||
|
|
||||||
SSDCacheIndex index;
|
SSDCacheIndex index;
|
||||||
size_t in_memory_partition_index;
|
size_t in_memory_partition_index;
|
||||||
CellState state;
|
CellState state;
|
||||||
@ -933,13 +952,12 @@ private:
|
|||||||
|
|
||||||
struct KeyToBlockOffset
|
struct KeyToBlockOffset
|
||||||
{
|
{
|
||||||
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_, bool is_expired_)
|
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_)
|
||||||
: key_index(key_index_), offset_in_block(offset_in_block_), is_expired(is_expired_)
|
: key_index(key_index_), offset_in_block(offset_in_block_)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
size_t key_index = 0;
|
size_t key_index = 0;
|
||||||
size_t offset_in_block = 0;
|
size_t offset_in_block = 0;
|
||||||
bool is_expired = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Result>
|
template <typename Result>
|
||||||
@ -950,20 +968,24 @@ private:
|
|||||||
Result result;
|
Result result;
|
||||||
|
|
||||||
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
||||||
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
|
result.key_index_to_state.resize_fill(keys.size());
|
||||||
|
|
||||||
const auto now = std::chrono::system_clock::now();
|
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||||
|
|
||||||
size_t fetched_columns_index = 0;
|
size_t fetched_columns_index = 0;
|
||||||
|
|
||||||
using BlockIndexToKeysMap = std::unordered_map<size_t, std::vector<KeyToBlockOffset>, DefaultHash<size_t>>;
|
using BlockIndexToKeysMap = absl::flat_hash_map<size_t, PaddedPODArray<KeyToBlockOffset>, DefaultHash<size_t>>;
|
||||||
BlockIndexToKeysMap block_to_keys_map;
|
BlockIndexToKeysMap block_to_keys_map;
|
||||||
absl::flat_hash_set<size_t, DefaultHash<size_t>> unique_blocks_to_request;
|
absl::flat_hash_set<size_t, DefaultHash<size_t>> unique_blocks_to_request;
|
||||||
PaddedPODArray<size_t> blocks_to_request;
|
PaddedPODArray<size_t> blocks_to_request;
|
||||||
|
|
||||||
std::chrono::seconds strict_max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
|
time_t strict_max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
|
||||||
size_t keys_size = keys.size();
|
size_t keys_size = keys.size();
|
||||||
|
|
||||||
|
for (size_t attribute_size = 0; attribute_size < fetch_request.attributesSize(); ++attribute_size)
|
||||||
|
if (fetch_request.shouldFillResultColumnWithIndex(attribute_size))
|
||||||
|
result.fetched_columns[attribute_size]->reserve(keys_size);
|
||||||
|
|
||||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
auto key = keys[key_index];
|
auto key = keys[key_index];
|
||||||
@ -978,9 +1000,7 @@ private:
|
|||||||
|
|
||||||
const auto & cell = it->getMapped();
|
const auto & cell = it->getMapped();
|
||||||
|
|
||||||
bool has_deadline = cellHasDeadline(cell);
|
if (unlikely(now > cell.deadline + strict_max_lifetime_seconds))
|
||||||
|
|
||||||
if (has_deadline && now > cell.deadline + strict_max_lifetime_seconds)
|
|
||||||
{
|
{
|
||||||
++result.not_found_keys_size;
|
++result.not_found_keys_size;
|
||||||
continue;
|
continue;
|
||||||
@ -989,14 +1009,14 @@ private:
|
|||||||
bool cell_is_expired = false;
|
bool cell_is_expired = false;
|
||||||
KeyState::State key_state = KeyState::found;
|
KeyState::State key_state = KeyState::found;
|
||||||
|
|
||||||
if (has_deadline && now > cell.deadline)
|
if (now > cell.deadline)
|
||||||
{
|
{
|
||||||
cell_is_expired = true;
|
cell_is_expired = true;
|
||||||
key_state = KeyState::expired;
|
key_state = KeyState::expired;
|
||||||
}
|
}
|
||||||
|
|
||||||
result.expired_keys_size += cell_is_expired;
|
result.expired_keys_size += static_cast<size_t>(cell_is_expired);
|
||||||
result.found_keys_size += !cell_is_expired;
|
result.found_keys_size += static_cast<size_t>(!cell_is_expired);
|
||||||
|
|
||||||
switch (cell.state)
|
switch (cell.state)
|
||||||
{
|
{
|
||||||
@ -1012,13 +1032,20 @@ private:
|
|||||||
}
|
}
|
||||||
case Cell::on_disk:
|
case Cell::on_disk:
|
||||||
{
|
{
|
||||||
block_to_keys_map[cell.index.block_index].emplace_back(key_index, cell.index.offset_in_block, cell_is_expired);
|
PaddedPODArray<KeyToBlockOffset> & keys_block = block_to_keys_map[cell.index.block_index];
|
||||||
|
keys_block.emplace_back(key_index, cell.index.offset_in_block);
|
||||||
|
|
||||||
if (!unique_blocks_to_request.contains(cell.index.block_index))
|
KeyState::State state = cell_is_expired ? KeyState::expired : KeyState::found;
|
||||||
{
|
|
||||||
|
/// Fetched column index will be set later during fetch blocks
|
||||||
|
result.key_index_to_state[key_index] = {state, 0};
|
||||||
|
|
||||||
|
auto insert_result = unique_blocks_to_request.insert(cell.index.block_index);
|
||||||
|
bool was_inserted = insert_result.second;
|
||||||
|
|
||||||
|
if (was_inserted)
|
||||||
blocks_to_request.emplace_back(cell.index.block_index);
|
blocks_to_request.emplace_back(cell.index.block_index);
|
||||||
unique_blocks_to_request.insert(cell.index.block_index);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Cell::default_value:
|
case Cell::default_value:
|
||||||
@ -1037,7 +1064,7 @@ private:
|
|||||||
/// Sort blocks by offset before start async io requests
|
/// Sort blocks by offset before start async io requests
|
||||||
std::sort(blocks_to_request.begin(), blocks_to_request.end());
|
std::sort(blocks_to_request.begin(), blocks_to_request.end());
|
||||||
|
|
||||||
file_buffer.fetchBlocks(read_from_file_buffer.m_data, configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
|
file_buffer.fetchBlocks(configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
|
||||||
{
|
{
|
||||||
auto & keys_in_block = block_to_keys_map[block_index];
|
auto & keys_in_block = block_to_keys_map[block_index];
|
||||||
|
|
||||||
@ -1046,10 +1073,7 @@ private:
|
|||||||
char * key_data = block_data + key_in_block.offset_in_block;
|
char * key_data = block_data + key_in_block.offset_in_block;
|
||||||
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
|
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
|
||||||
|
|
||||||
if (key_in_block.is_expired)
|
result.key_index_to_state[key_in_block.key_index].setFetchedColumnIndex(fetched_columns_index);
|
||||||
result.key_index_to_state[key_in_block.key_index] = {KeyState::expired, fetched_columns_index};
|
|
||||||
else
|
|
||||||
result.key_index_to_state[key_in_block.key_index] = {KeyState::found, fetched_columns_index};
|
|
||||||
|
|
||||||
++fetched_columns_index;
|
++fetched_columns_index;
|
||||||
}
|
}
|
||||||
@ -1087,7 +1111,7 @@ private:
|
|||||||
throw Exception("Serialized columns size is greater than allowed block size and metadata", ErrorCodes::UNSUPPORTED_METHOD);
|
throw Exception("Serialized columns size is greater than allowed block size and metadata", ErrorCodes::UNSUPPORTED_METHOD);
|
||||||
|
|
||||||
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
|
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
|
||||||
index.erase(key);
|
eraseKeyFromIndex(key);
|
||||||
|
|
||||||
Cell cell;
|
Cell cell;
|
||||||
setCellDeadline(cell, now);
|
setCellDeadline(cell, now);
|
||||||
@ -1114,8 +1138,7 @@ private:
|
|||||||
|
|
||||||
for (auto key : keys)
|
for (auto key : keys)
|
||||||
{
|
{
|
||||||
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
|
eraseKeyFromIndex(key);
|
||||||
index.erase(key);
|
|
||||||
|
|
||||||
Cell cell;
|
Cell cell;
|
||||||
|
|
||||||
@ -1135,7 +1158,7 @@ private:
|
|||||||
key = updated_key;
|
key = updated_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
index.insert(key, cell);
|
index[key] = cell;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1188,7 +1211,7 @@ private:
|
|||||||
cell.index = cache_index;
|
cell.index = cache_index;
|
||||||
cell.in_memory_partition_index = current_partition_index;
|
cell.in_memory_partition_index = current_partition_index;
|
||||||
|
|
||||||
index.insert(ssd_cache_key.key, cell);
|
index[ssd_cache_key.key] = cell;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1218,7 +1241,7 @@ private:
|
|||||||
if (old_key_cell.isOnDisk() &&
|
if (old_key_cell.isOnDisk() &&
|
||||||
old_key_block >= block_index_in_file_before_write &&
|
old_key_block >= block_index_in_file_before_write &&
|
||||||
old_key_block < file_read_end_block_index)
|
old_key_block < file_read_end_block_index)
|
||||||
index.erase(old_key);
|
eraseKeyFromIndex(old_key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1271,7 +1294,7 @@ private:
|
|||||||
cell.index = cache_index;
|
cell.index = cache_index;
|
||||||
cell.in_memory_partition_index = current_partition_index;
|
cell.in_memory_partition_index = current_partition_index;
|
||||||
|
|
||||||
index.insert(ssd_cache_key.key, cell);
|
index[ssd_cache_key.key] = cell;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1296,16 +1319,12 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool cellHasDeadline(const Cell & cell)
|
|
||||||
{
|
|
||||||
return cell.deadline != std::chrono::system_clock::from_time_t(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void setCellDeadline(Cell & cell, TimePoint now)
|
inline void setCellDeadline(Cell & cell, TimePoint now)
|
||||||
{
|
{
|
||||||
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
||||||
{
|
{
|
||||||
cell.deadline = std::chrono::system_clock::from_time_t(0);
|
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
|
||||||
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1313,47 +1332,45 @@ private:
|
|||||||
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
||||||
|
|
||||||
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
||||||
cell.deadline = now + std::chrono::seconds{distribution(rnd_engine)};
|
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
||||||
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename>
|
inline void eraseKeyFromIndex(KeyType key)
|
||||||
friend class ArenaCellKeyDisposer;
|
{
|
||||||
|
auto it = index.find(key);
|
||||||
|
|
||||||
|
if (it == nullptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// In case of complex key in arena key is serialized from hash table
|
||||||
|
KeyType key_copy = it->getKey();
|
||||||
|
|
||||||
|
index.erase(key);
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||||
|
complex_key_arena.free(const_cast<char *>(key_copy.data), key_copy.size);
|
||||||
|
}
|
||||||
|
|
||||||
SSDCacheDictionaryStorageConfiguration configuration;
|
SSDCacheDictionaryStorageConfiguration configuration;
|
||||||
|
|
||||||
SSDCacheFileBuffer<SSDCacheKeyType> file_buffer;
|
SSDCacheFileBuffer<SSDCacheKeyType> file_buffer;
|
||||||
|
|
||||||
Memory<Allocator<true>> read_from_file_buffer;
|
|
||||||
|
|
||||||
std::vector<SSDCacheMemoryBuffer<SSDCacheKeyType>> memory_buffer_partitions;
|
std::vector<SSDCacheMemoryBuffer<SSDCacheKeyType>> memory_buffer_partitions;
|
||||||
|
|
||||||
pcg64 rnd_engine;
|
pcg64 rnd_engine;
|
||||||
|
|
||||||
class ArenaCellKeyDisposer
|
using SimpleKeyHashMap = HashMap<UInt64, Cell>;
|
||||||
{
|
using ComplexKeyHashMap = HashMapWithSavedHash<StringRef, Cell>;
|
||||||
public:
|
|
||||||
ArenaWithFreeLists & arena;
|
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
using CacheMap = std::conditional_t<
|
||||||
void operator()(const Key & key, const Value &) const
|
|
||||||
{
|
|
||||||
/// In case of complex key we keep it in arena
|
|
||||||
if constexpr (std::is_same_v<Key, StringRef>)
|
|
||||||
arena.free(const_cast<char *>(key.data), key.size);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellKeyDisposer>;
|
|
||||||
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellKeyDisposer>;
|
|
||||||
|
|
||||||
using CacheLRUHashMap = std::conditional_t<
|
|
||||||
dictionary_key_type == DictionaryKeyType::simple,
|
dictionary_key_type == DictionaryKeyType::simple,
|
||||||
SimpleKeyLRUHashMap,
|
SimpleKeyHashMap,
|
||||||
ComplexKeyLRUHashMap>;
|
ComplexKeyHashMap>;
|
||||||
|
|
||||||
ArenaWithFreeLists complex_key_arena;
|
ArenaWithFreeLists complex_key_arena;
|
||||||
|
|
||||||
CacheLRUHashMap index;
|
CacheMap index;
|
||||||
|
|
||||||
size_t current_partition_index = 0;
|
size_t current_partition_index = 0;
|
||||||
|
|
||||||
|
@ -1,154 +0,0 @@
|
|||||||
clickhouse-client --query="DROP TABLE IF EXISTS simple_cache_dictionary_table_source";
|
|
||||||
clickhouse-client --query="CREATE TABLE simple_cache_dictionary_table_source (id UInt64, value1 String, value2 UInt64, value3 String, value4 Float64, value5 Decimal64(4)) ENGINE=TinyLog;"
|
|
||||||
clickhouse-client --query="INSERT INTO simple_cache_dictionary_table_source SELECT number, concat('Value1 ', toString(number)), number, concat('Value3 ', toString(number)), toFloat64(number), cast(number, 'Decimal64(4)') FROM system.numbers LIMIT 1000000;"
|
|
||||||
|
|
||||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_simple_cache_dictionary (
|
|
||||||
id UInt64,
|
|
||||||
value1 String,
|
|
||||||
value2 UInt64,
|
|
||||||
value3 String,
|
|
||||||
value4 Float64,
|
|
||||||
value5 Decimal64(4)
|
|
||||||
)
|
|
||||||
PRIMARY KEY id
|
|
||||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
|
||||||
LIFETIME(MIN 300 MAX 300)
|
|
||||||
LAYOUT(CACHE(SIZE_IN_CELLS 100000));"
|
|
||||||
|
|
||||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_ssd_simple_cache_dictionary (
|
|
||||||
id UInt64,
|
|
||||||
value1 String,
|
|
||||||
value2 UInt64,
|
|
||||||
value3 String,
|
|
||||||
value4 Float64,
|
|
||||||
value5 Decimal64(4)
|
|
||||||
)
|
|
||||||
PRIMARY KEY id
|
|
||||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
|
||||||
LIFETIME(MIN 300 MAX 300)
|
|
||||||
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 WRITE_BUFFER_SIZE 327680 MAX_STORED_KEYS 1048576 PATH '/opt/mkita/ClickHouse/build_release/programs/ssd_cache'));"
|
|
||||||
|
|
||||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_dummy_simple_cache_dictionary (
|
|
||||||
id UInt64,
|
|
||||||
value1 String,
|
|
||||||
value2 UInt64,
|
|
||||||
value3 String,
|
|
||||||
value4 Float64,
|
|
||||||
value5 Decimal64(4)
|
|
||||||
)
|
|
||||||
PRIMARY KEY id
|
|
||||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
|
||||||
LIFETIME(MIN 300 MAX 300)
|
|
||||||
LAYOUT(DUMMY_SIMPLE());"
|
|
||||||
|
|
||||||
./clickhouse-benchmark --query="SELECT
|
|
||||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value2', number),
|
|
||||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value3', number),
|
|
||||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value4', number),
|
|
||||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT Null"
|
|
||||||
|
|
||||||
./clickhouse-benchmark --query="SELECT
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT Null"
|
|
||||||
|
|
||||||
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
|
|
||||||
|
|
||||||
./clickhouse-benchmark --query="SELECT
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT Null"
|
|
||||||
|
|
||||||
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value2', number),
|
|
||||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value3', number),
|
|
||||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value4', number),
|
|
||||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000 FORMAT Null
|
|
||||||
|
|
||||||
SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT
|
|
||||||
Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT
|
|
||||||
Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 10000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value1', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value2', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value3', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value4', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value1', number),
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value2', number),
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value3', number),
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value4', number),
|
|
||||||
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
|
|
||||||
FROM system.numbers
|
|
||||||
LIMIT 100000
|
|
||||||
FORMAT Null
|
|
||||||
|
|
||||||
SELECT * FROM clickhouse_simple_cache_dictionary_table;
|
|
@ -1,6 +1,6 @@
|
|||||||
#include "CacheDictionary.h"
|
#include "CacheDictionary.h"
|
||||||
#include "SSDCacheDictionaryStorage.h"
|
|
||||||
#include "CacheDictionaryStorage.h"
|
#include "CacheDictionaryStorage.h"
|
||||||
|
#include "SSDCacheDictionaryStorage.h"
|
||||||
#include <Dictionaries/DictionaryFactory.h>
|
#include <Dictionaries/DictionaryFactory.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -20,13 +20,13 @@ CacheDictionaryStorageConfiguration parseCacheStorageConfiguration(
|
|||||||
const DictionaryLifetime & dict_lifetime,
|
const DictionaryLifetime & dict_lifetime,
|
||||||
DictionaryKeyType dictionary_key_type)
|
DictionaryKeyType dictionary_key_type)
|
||||||
{
|
{
|
||||||
String dictionary_type_prefix = dictionary_key_type == DictionaryKeyType::complex ? ".complex_key_cache." : ".cache.";
|
String dictionary_type_prefix = (dictionary_key_type == DictionaryKeyType::complex) ? ".complex_key_cache." : ".cache.";
|
||||||
String dictionary_configuration_prefix = layout_prefix + dictionary_type_prefix;
|
String dictionary_configuration_prefix = layout_prefix + dictionary_type_prefix;
|
||||||
|
|
||||||
const size_t size = config.getUInt64(dictionary_configuration_prefix + "size_in_cells");
|
const size_t size = config.getUInt64(dictionary_configuration_prefix + "size_in_cells");
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
throw Exception(ErrorCodes::TOO_SMALL_BUFFER_SIZE,
|
throw Exception(ErrorCodes::TOO_SMALL_BUFFER_SIZE,
|
||||||
"({}: cache dictionary cannot have 0 cells",
|
"({}): cache dictionary cannot have 0 cells",
|
||||||
full_name);
|
full_name);
|
||||||
|
|
||||||
size_t dict_lifetime_seconds = static_cast<size_t>(dict_lifetime.max_sec);
|
size_t dict_lifetime_seconds = static_cast<size_t>(dict_lifetime.max_sec);
|
||||||
@ -59,7 +59,6 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
|
|||||||
static constexpr size_t DEFAULT_READ_BUFFER_SIZE_BYTES = 16 * DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
static constexpr size_t DEFAULT_READ_BUFFER_SIZE_BYTES = 16 * DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
||||||
static constexpr size_t DEFAULT_WRITE_BUFFER_SIZE_BYTES = DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
static constexpr size_t DEFAULT_WRITE_BUFFER_SIZE_BYTES = DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
||||||
|
|
||||||
static constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000;
|
|
||||||
static constexpr size_t DEFAULT_PARTITIONS_COUNT = 16;
|
static constexpr size_t DEFAULT_PARTITIONS_COUNT = 16;
|
||||||
|
|
||||||
const size_t max_partitions_count
|
const size_t max_partitions_count
|
||||||
@ -94,16 +93,11 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
|
|||||||
if (directory_path.at(0) != '/')
|
if (directory_path.at(0) != '/')
|
||||||
directory_path = std::filesystem::path{config.getString("path")}.concat(directory_path).string();
|
directory_path = std::filesystem::path{config.getString("path")}.concat(directory_path).string();
|
||||||
|
|
||||||
const size_t max_stored_keys_in_partition
|
|
||||||
= config.getInt64(dictionary_configuration_prefix + "max_stored_keys", DEFAULT_MAX_STORED_KEYS);
|
|
||||||
const size_t rounded_size = roundUpToPowerOfTwoOrZero(max_stored_keys_in_partition);
|
|
||||||
|
|
||||||
SSDCacheDictionaryStorageConfiguration configuration{
|
SSDCacheDictionaryStorageConfiguration configuration{
|
||||||
strict_max_lifetime_seconds,
|
strict_max_lifetime_seconds,
|
||||||
dict_lifetime,
|
dict_lifetime,
|
||||||
directory_path,
|
directory_path,
|
||||||
max_partitions_count,
|
max_partitions_count,
|
||||||
rounded_size,
|
|
||||||
block_size,
|
block_size,
|
||||||
file_size / block_size,
|
file_size / block_size,
|
||||||
read_buffer_size / block_size,
|
read_buffer_size / block_size,
|
||||||
@ -194,7 +188,8 @@ DictionaryPtr createCacheDictionaryLayout(
|
|||||||
const bool allow_read_expired_keys = config.getBool(layout_prefix + ".cache.allow_read_expired_keys", false);
|
const bool allow_read_expired_keys = config.getBool(layout_prefix + ".cache.allow_read_expired_keys", false);
|
||||||
|
|
||||||
auto storage_configuration = parseCacheStorageConfiguration(full_name, config, layout_prefix, dict_lifetime, dictionary_key_type);
|
auto storage_configuration = parseCacheStorageConfiguration(full_name, config, layout_prefix, dict_lifetime, dictionary_key_type);
|
||||||
auto storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(storage_configuration);
|
|
||||||
|
std::shared_ptr<ICacheDictionaryStorage> storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(dict_struct, storage_configuration);
|
||||||
|
|
||||||
auto update_queue_configuration = parseCacheDictionaryUpdateQueueConfiguration(full_name, config, layout_prefix, dictionary_key_type);
|
auto update_queue_configuration = parseCacheDictionaryUpdateQueueConfiguration(full_name, config, layout_prefix, dictionary_key_type);
|
||||||
|
|
||||||
|
@ -538,8 +538,9 @@ private:
|
|||||||
|
|
||||||
[[maybe_unused]] const auto block_size = static_cast<size_t>(EVP_CIPHER_block_size(evp_cipher));
|
[[maybe_unused]] const auto block_size = static_cast<size_t>(EVP_CIPHER_block_size(evp_cipher));
|
||||||
[[maybe_unused]] const auto iv_size = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
|
[[maybe_unused]] const auto iv_size = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
|
||||||
const auto key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
|
|
||||||
const auto tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
|
const size_t key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
|
||||||
|
static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
|
||||||
|
|
||||||
auto decrypted_result_column = ColumnString::create();
|
auto decrypted_result_column = ColumnString::create();
|
||||||
auto & decrypted_result_column_data = decrypted_result_column->getChars();
|
auto & decrypted_result_column_data = decrypted_result_column->getChars();
|
||||||
@ -549,9 +550,17 @@ private:
|
|||||||
size_t resulting_size = 0;
|
size_t resulting_size = 0;
|
||||||
for (size_t r = 0; r < input_rows_count; ++r)
|
for (size_t r = 0; r < input_rows_count; ++r)
|
||||||
{
|
{
|
||||||
resulting_size += input_column->getDataAt(r).size + 1;
|
size_t string_size = input_column->getDataAt(r).size;
|
||||||
|
resulting_size += string_size + 1; /// With terminating zero.
|
||||||
|
|
||||||
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
|
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
|
||||||
|
{
|
||||||
|
if (string_size < tag_size)
|
||||||
|
throw Exception("Encrypted data is smaller than the size of additional data for AEAD mode, cannot decrypt.",
|
||||||
|
ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
resulting_size -= tag_size;
|
resulting_size -= tag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(MEMORY_SANITIZER)
|
#if defined(MEMORY_SANITIZER)
|
||||||
@ -565,6 +574,7 @@ private:
|
|||||||
decrypted_result_column_data.resize(resulting_size);
|
decrypted_result_column_data.resize(resulting_size);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
auto * decrypted = decrypted_result_column_data.data();
|
auto * decrypted = decrypted_result_column_data.data();
|
||||||
|
|
||||||
KeyHolder<mode> key_holder;
|
KeyHolder<mode> key_holder;
|
||||||
@ -631,7 +641,7 @@ private:
|
|||||||
// 1.a.2: Set AAD if present
|
// 1.a.2: Set AAD if present
|
||||||
if (aad_column)
|
if (aad_column)
|
||||||
{
|
{
|
||||||
const auto aad_data = aad_column->getDataAt(r);
|
StringRef aad_data = aad_column->getDataAt(r);
|
||||||
int tmp_len = 0;
|
int tmp_len = 0;
|
||||||
if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len,
|
if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len,
|
||||||
reinterpret_cast<const unsigned char *>(aad_data.data), aad_data.size) != 1)
|
reinterpret_cast<const unsigned char *>(aad_data.data), aad_data.size) != 1)
|
||||||
|
@ -42,11 +42,11 @@ struct SimdJSONParser
|
|||||||
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
|
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
|
||||||
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
|
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
|
||||||
|
|
||||||
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
|
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
|
||||||
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
|
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
|
||||||
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
|
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
|
||||||
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
|
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
|
||||||
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
|
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
|
||||||
ALWAYS_INLINE Array getArray() const;
|
ALWAYS_INLINE Array getArray() const;
|
||||||
ALWAYS_INLINE Object getObject() const;
|
ALWAYS_INLINE Object getObject() const;
|
||||||
|
|
||||||
@ -75,7 +75,7 @@ struct SimdJSONParser
|
|||||||
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
|
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
|
||||||
ALWAYS_INLINE Iterator end() const { return array.end(); }
|
ALWAYS_INLINE Iterator end() const { return array.end(); }
|
||||||
ALWAYS_INLINE size_t size() const { return array.size(); }
|
ALWAYS_INLINE size_t size() const { return array.size(); }
|
||||||
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
|
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
simdjson::dom::array array;
|
simdjson::dom::array array;
|
||||||
@ -111,7 +111,7 @@ struct SimdJSONParser
|
|||||||
if (x.error())
|
if (x.error())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
result = x.first;
|
result = x.value_unsafe();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +137,7 @@ struct SimdJSONParser
|
|||||||
if (document.error())
|
if (document.error())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
result = document.first;
|
result = document.value_unsafe();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,12 +155,12 @@ private:
|
|||||||
|
|
||||||
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
|
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
|
||||||
{
|
{
|
||||||
return element.get_array().first;
|
return element.get_array().value_unsafe();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
|
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
|
||||||
{
|
{
|
||||||
return element.get_object().first;
|
return element.get_object().value_unsafe();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,70 @@ struct ExtractFirstSignificantSubdomain
|
|||||||
res_data += last_3_periods[1] + 1 - begin;
|
res_data += last_3_periods[1] + 1 - begin;
|
||||||
res_size = last_3_periods[0] - last_3_periods[1] - 1;
|
res_size = last_3_periods[0] - last_3_periods[1] - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The difference with execute() is due to custom TLD list can have records of any level,
|
||||||
|
/// not only 2-nd level (like non-custom variant), so it requires more lookups.
|
||||||
|
template <class Lookup>
|
||||||
|
static void executeCustom(const Lookup & lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
|
||||||
|
{
|
||||||
|
res_data = data;
|
||||||
|
res_size = 0;
|
||||||
|
|
||||||
|
Pos tmp;
|
||||||
|
size_t domain_length;
|
||||||
|
ExtractDomain<without_www>::execute(data, size, tmp, domain_length);
|
||||||
|
|
||||||
|
if (domain_length == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (out_domain_end)
|
||||||
|
*out_domain_end = tmp + domain_length;
|
||||||
|
|
||||||
|
/// cut useless dot
|
||||||
|
if (tmp[domain_length - 1] == '.')
|
||||||
|
--domain_length;
|
||||||
|
|
||||||
|
res_data = tmp;
|
||||||
|
res_size = domain_length;
|
||||||
|
|
||||||
|
auto begin = tmp;
|
||||||
|
auto end = begin + domain_length;
|
||||||
|
const char * last_2_periods[2]{};
|
||||||
|
const char * prev = begin - 1;
|
||||||
|
|
||||||
|
auto pos = find_first_symbols<'.'>(begin, end);
|
||||||
|
while (pos < end)
|
||||||
|
{
|
||||||
|
if (lookup(pos + 1, end - pos - 1))
|
||||||
|
{
|
||||||
|
res_data += prev + 1 - begin;
|
||||||
|
res_size = end - 1 - prev;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
last_2_periods[1] = last_2_periods[0];
|
||||||
|
last_2_periods[0] = pos;
|
||||||
|
prev = pos;
|
||||||
|
pos = find_first_symbols<'.'>(pos + 1, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// if there is domain of the first level (i.e. no dots in the hostname) -> return nothing
|
||||||
|
if (!last_2_periods[0])
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// if there is domain of the second level -> always return itself
|
||||||
|
if (!last_2_periods[1])
|
||||||
|
{
|
||||||
|
res_size = last_2_periods[0] - begin;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// if there is domain of the 3+ level, and zero records in TLD list ->
|
||||||
|
/// fallback to domain of the second level
|
||||||
|
res_data += last_2_periods[1] + 1 - begin;
|
||||||
|
res_size = last_2_periods[0] - last_2_periods[1] - 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -17,10 +17,10 @@ namespace ErrorCodes
|
|||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FirstSignificantSubdomainCustomtLookup
|
struct FirstSignificantSubdomainCustomLookup
|
||||||
{
|
{
|
||||||
const TLDList & tld_list;
|
const TLDList & tld_list;
|
||||||
FirstSignificantSubdomainCustomtLookup(const std::string & tld_list_name)
|
FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name)
|
||||||
: tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name))
|
: tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -63,7 +63,7 @@ public:
|
|||||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
|
||||||
{
|
{
|
||||||
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||||
FirstSignificantSubdomainCustomtLookup tld_lookup(column_tld_list_name->getValue<String>());
|
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
|
||||||
|
|
||||||
/// FIXME: convertToFullColumnIfConst() is suboptimal
|
/// FIXME: convertToFullColumnIfConst() is suboptimal
|
||||||
auto column = arguments[0].column->convertToFullColumnIfConst();
|
auto column = arguments[0].column->convertToFullColumnIfConst();
|
||||||
@ -79,7 +79,7 @@ public:
|
|||||||
ErrorCodes::ILLEGAL_COLUMN);
|
ErrorCodes::ILLEGAL_COLUMN);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vector(FirstSignificantSubdomainCustomtLookup & tld_lookup,
|
static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup,
|
||||||
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
|
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
|
||||||
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
||||||
{
|
{
|
||||||
|
@ -10,7 +10,7 @@ struct CutToFirstSignificantSubdomainCustom
|
|||||||
{
|
{
|
||||||
static size_t getReserveLengthForElement() { return 15; }
|
static size_t getReserveLengthForElement() { return 15; }
|
||||||
|
|
||||||
static void execute(FirstSignificantSubdomainCustomtLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
|
static void execute(FirstSignificantSubdomainCustomLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
|
||||||
{
|
{
|
||||||
res_data = data;
|
res_data = data;
|
||||||
res_size = 0;
|
res_size = 0;
|
||||||
@ -18,7 +18,7 @@ struct CutToFirstSignificantSubdomainCustom
|
|||||||
Pos tmp_data;
|
Pos tmp_data;
|
||||||
size_t tmp_length;
|
size_t tmp_length;
|
||||||
Pos domain_end;
|
Pos domain_end;
|
||||||
ExtractFirstSignificantSubdomain<without_www>::execute(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
|
ExtractFirstSignificantSubdomain<without_www>::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
|
||||||
|
|
||||||
if (tmp_length == 0)
|
if (tmp_length == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -190,7 +190,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
|
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
|
||||||
if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
|
if (static_cast<size_t>(max_key) - static_cast<size_t>(min_key) > MAX_ARRAY_SIZE)
|
||||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
|
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
|
||||||
|
|
||||||
/* fill the result arrays */
|
/* fill the result arrays */
|
||||||
|
@ -16,6 +16,7 @@ namespace ErrorCodes
|
|||||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||||
extern const int ILLEGAL_COLUMN;
|
extern const int ILLEGAL_COLUMN;
|
||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -110,6 +111,9 @@ public:
|
|||||||
arguments[2].column->getFloat64(i),
|
arguments[2].column->getFloat64(i),
|
||||||
max_width);
|
max_width);
|
||||||
|
|
||||||
|
if (!isFinite(width))
|
||||||
|
throw Exception("Value of width must not be NaN and Inf", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
|
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
|
||||||
dst_chars.resize(next_size);
|
dst_chars.resize(next_size);
|
||||||
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
|
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
|
||||||
|
@ -41,7 +41,8 @@ void registerFunctionThrowIf(FunctionFactory &);
|
|||||||
void registerFunctionVersion(FunctionFactory &);
|
void registerFunctionVersion(FunctionFactory &);
|
||||||
void registerFunctionBuildId(FunctionFactory &);
|
void registerFunctionBuildId(FunctionFactory &);
|
||||||
void registerFunctionUptime(FunctionFactory &);
|
void registerFunctionUptime(FunctionFactory &);
|
||||||
void registerFunctionTimeZone(FunctionFactory &);
|
void registerFunctionTimezone(FunctionFactory &);
|
||||||
|
void registerFunctionTimezoneOf(FunctionFactory &);
|
||||||
void registerFunctionRunningAccumulate(FunctionFactory &);
|
void registerFunctionRunningAccumulate(FunctionFactory &);
|
||||||
void registerFunctionRunningDifference(FunctionFactory &);
|
void registerFunctionRunningDifference(FunctionFactory &);
|
||||||
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
|
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
|
||||||
@ -111,7 +112,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
|||||||
registerFunctionVersion(factory);
|
registerFunctionVersion(factory);
|
||||||
registerFunctionBuildId(factory);
|
registerFunctionBuildId(factory);
|
||||||
registerFunctionUptime(factory);
|
registerFunctionUptime(factory);
|
||||||
registerFunctionTimeZone(factory);
|
registerFunctionTimezone(factory);
|
||||||
|
registerFunctionTimezoneOf(factory);
|
||||||
registerFunctionRunningAccumulate(factory);
|
registerFunctionRunningAccumulate(factory);
|
||||||
registerFunctionRunningDifference(factory);
|
registerFunctionRunningDifference(factory);
|
||||||
registerFunctionRunningDifferenceStartingWithFirstValue(factory);
|
registerFunctionRunningDifferenceStartingWithFirstValue(factory);
|
||||||
|
@ -12,13 +12,13 @@ namespace
|
|||||||
|
|
||||||
/** Returns the server time zone.
|
/** Returns the server time zone.
|
||||||
*/
|
*/
|
||||||
class FunctionTimeZone : public IFunction
|
class FunctionTimezone : public IFunction
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = "timezone";
|
static constexpr auto name = "timezone";
|
||||||
static FunctionPtr create(const Context &)
|
static FunctionPtr create(const Context &)
|
||||||
{
|
{
|
||||||
return std::make_shared<FunctionTimeZone>();
|
return std::make_shared<FunctionTimezone>();
|
||||||
}
|
}
|
||||||
|
|
||||||
String getName() const override
|
String getName() const override
|
||||||
@ -45,9 +45,10 @@ public:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerFunctionTimeZone(FunctionFactory & factory)
|
void registerFunctionTimezone(FunctionFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerFunction<FunctionTimeZone>();
|
factory.registerFunction<FunctionTimezone>();
|
||||||
|
factory.registerAlias("timeZone", "timezone");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
118
src/Functions/timezoneOf.cpp
Normal file
118
src/Functions/timezoneOf.cpp
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#include <Functions/IFunctionImpl.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
|
#include <DataTypes/DataTypeDateTime.h>
|
||||||
|
#include <common/DateLUTImpl.h>
|
||||||
|
#include <Core/Field.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
/** timezoneOf(x) - get the name of the timezone of DateTime data type.
|
||||||
|
* Example: Europe/Moscow.
|
||||||
|
*/
|
||||||
|
class ExecutableFunctionTimezoneOf : public IExecutableFunctionImpl
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "timezoneOf";
|
||||||
|
String getName() const override { return name; }
|
||||||
|
|
||||||
|
bool useDefaultImplementationForNulls() const override { return false; }
|
||||||
|
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||||
|
|
||||||
|
/// Execute the function on the columns.
|
||||||
|
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
|
{
|
||||||
|
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
|
||||||
|
|
||||||
|
return DataTypeString().createColumnConst(input_rows_count,
|
||||||
|
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class BaseFunctionTimezoneOf : public IFunctionBaseImpl
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
BaseFunctionTimezoneOf(DataTypes argument_types_, DataTypePtr return_type_)
|
||||||
|
: argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
|
||||||
|
|
||||||
|
static constexpr auto name = "timezoneOf";
|
||||||
|
String getName() const override { return name; }
|
||||||
|
|
||||||
|
bool isDeterministic() const override { return true; }
|
||||||
|
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||||
|
|
||||||
|
const DataTypes & getArgumentTypes() const override { return argument_types; }
|
||||||
|
const DataTypePtr & getResultType() const override { return return_type; }
|
||||||
|
|
||||||
|
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
|
||||||
|
{
|
||||||
|
return std::make_unique<ExecutableFunctionTimezoneOf>();
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & arguments) const override
|
||||||
|
{
|
||||||
|
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
|
||||||
|
|
||||||
|
return DataTypeString().createColumnConst(1,
|
||||||
|
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DataTypes argument_types;
|
||||||
|
DataTypePtr return_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionTimezoneOfBuilder : public IFunctionOverloadResolverImpl
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "timezoneOf";
|
||||||
|
String getName() const override { return name; }
|
||||||
|
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<FunctionTimezoneOfBuilder>(); }
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override { return 1; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnType(const DataTypes & types) const override
|
||||||
|
{
|
||||||
|
DataTypePtr type_no_nullable = removeNullable(types[0]);
|
||||||
|
|
||||||
|
if (isDateTime(type_no_nullable) || isDateTime64(type_no_nullable))
|
||||||
|
return std::make_shared<DataTypeString>();
|
||||||
|
else
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad argument for function {}, should be DateTime or DateTime64", name);
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||||
|
{
|
||||||
|
return std::make_unique<BaseFunctionTimezoneOf>(DataTypes{arguments[0].type}, return_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool useDefaultImplementationForNulls() const override { return false; }
|
||||||
|
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||||
|
ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerFunctionTimezoneOf(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionTimezoneOfBuilder>();
|
||||||
|
factory.registerAlias("timeZoneOf", "timezoneOf");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -21,11 +21,11 @@ namespace
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// Just changes time zone information for data type. The calculation is free.
|
/// Just changes time zone information for data type. The calculation is free.
|
||||||
class FunctionToTimeZone : public IFunction
|
class FunctionToTimezone : public IFunction
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = "toTimeZone";
|
static constexpr auto name = "toTimezone";
|
||||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimeZone>(); }
|
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimezone>(); }
|
||||||
|
|
||||||
String getName() const override
|
String getName() const override
|
||||||
{
|
{
|
||||||
@ -64,7 +64,8 @@ public:
|
|||||||
|
|
||||||
void registerFunctionToTimeZone(FunctionFactory & factory)
|
void registerFunctionToTimeZone(FunctionFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerFunction<FunctionToTimeZone>();
|
factory.registerFunction<FunctionToTimezone>();
|
||||||
|
factory.registerAlias("toTimeZone", "toTimezone");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -467,6 +467,7 @@ SRCS(
|
|||||||
timeSlot.cpp
|
timeSlot.cpp
|
||||||
timeSlots.cpp
|
timeSlots.cpp
|
||||||
timezone.cpp
|
timezone.cpp
|
||||||
|
timezoneOf.cpp
|
||||||
timezoneOffset.cpp
|
timezoneOffset.cpp
|
||||||
toColumnTypeName.cpp
|
toColumnTypeName.cpp
|
||||||
toCustomWeek.cpp
|
toCustomWeek.cpp
|
||||||
@ -506,7 +507,7 @@ SRCS(
|
|||||||
toStartOfTenMinutes.cpp
|
toStartOfTenMinutes.cpp
|
||||||
toStartOfYear.cpp
|
toStartOfYear.cpp
|
||||||
toTime.cpp
|
toTime.cpp
|
||||||
toTimeZone.cpp
|
toTimezone.cpp
|
||||||
toTypeName.cpp
|
toTypeName.cpp
|
||||||
toUnixTimestamp64Micro.cpp
|
toUnixTimestamp64Micro.cpp
|
||||||
toUnixTimestamp64Milli.cpp
|
toUnixTimestamp64Milli.cpp
|
||||||
|
@ -106,7 +106,7 @@ void BrotliWriteBuffer::finish()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
finishImpl();
|
finishImpl();
|
||||||
out->next();
|
out->finalize();
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
|
@ -105,7 +105,7 @@ void LZMADeflatingWriteBuffer::finish()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
finishImpl();
|
finishImpl();
|
||||||
out->next();
|
out->finalize();
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
|
@ -82,6 +82,7 @@ bool PeekableReadBuffer::peekNext()
|
|||||||
checkpoint.emplace(memory.data());
|
checkpoint.emplace(memory.data());
|
||||||
checkpoint_in_own_memory = true;
|
checkpoint_in_own_memory = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (currentlyReadFromOwnMemory())
|
if (currentlyReadFromOwnMemory())
|
||||||
{
|
{
|
||||||
/// Update buffer size
|
/// Update buffer size
|
||||||
@ -99,7 +100,6 @@ bool PeekableReadBuffer::peekNext()
|
|||||||
pos_offset = 0;
|
pos_offset = 0;
|
||||||
}
|
}
|
||||||
BufferBase::set(memory.data(), peeked_size + bytes_to_copy, pos_offset);
|
BufferBase::set(memory.data(), peeked_size + bytes_to_copy, pos_offset);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
peeked_size += bytes_to_copy;
|
peeked_size += bytes_to_copy;
|
||||||
@ -113,12 +113,21 @@ void PeekableReadBuffer::rollbackToCheckpoint(bool drop)
|
|||||||
{
|
{
|
||||||
checkStateCorrect();
|
checkStateCorrect();
|
||||||
|
|
||||||
if (!checkpoint)
|
assert(checkpoint);
|
||||||
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
|
||||||
else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
|
if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
|
||||||
|
{
|
||||||
|
/// Both checkpoint and position are in the same buffer.
|
||||||
pos = *checkpoint;
|
pos = *checkpoint;
|
||||||
else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// Checkpoint is in own memory and position is not.
|
||||||
|
assert(checkpointInOwnMemory());
|
||||||
|
|
||||||
|
/// Switch to reading from own memory.
|
||||||
BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
|
BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
|
||||||
|
}
|
||||||
|
|
||||||
if (drop)
|
if (drop)
|
||||||
dropCheckpoint();
|
dropCheckpoint();
|
||||||
@ -134,6 +143,7 @@ bool PeekableReadBuffer::nextImpl()
|
|||||||
|
|
||||||
checkStateCorrect();
|
checkStateCorrect();
|
||||||
bool res;
|
bool res;
|
||||||
|
bool checkpoint_at_end = checkpoint && *checkpoint == working_buffer.end() && currentlyReadFromOwnMemory();
|
||||||
|
|
||||||
if (checkpoint)
|
if (checkpoint)
|
||||||
{
|
{
|
||||||
@ -163,6 +173,13 @@ bool PeekableReadBuffer::nextImpl()
|
|||||||
BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
|
BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
|
||||||
nextimpl_working_buffer_offset = sub_buf.offset();
|
nextimpl_working_buffer_offset = sub_buf.offset();
|
||||||
|
|
||||||
|
if (checkpoint_at_end)
|
||||||
|
{
|
||||||
|
checkpoint.emplace(working_buffer.begin());
|
||||||
|
peeked_size = 0;
|
||||||
|
checkpoint_in_own_memory = false;
|
||||||
|
}
|
||||||
|
|
||||||
checkStateCorrect();
|
checkStateCorrect();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -43,10 +43,7 @@ public:
|
|||||||
/// Forget checkpoint and all data between checkpoint and position
|
/// Forget checkpoint and all data between checkpoint and position
|
||||||
ALWAYS_INLINE inline void dropCheckpoint()
|
ALWAYS_INLINE inline void dropCheckpoint()
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
assert(checkpoint);
|
||||||
if (!checkpoint)
|
|
||||||
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
|
||||||
#endif
|
|
||||||
if (!currentlyReadFromOwnMemory())
|
if (!currentlyReadFromOwnMemory())
|
||||||
{
|
{
|
||||||
/// Don't need to store unread data anymore
|
/// Don't need to store unread data anymore
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <Poco/Net/NetException.h>
|
#include <Poco/Net/NetException.h>
|
||||||
|
|
||||||
#include <IO/ReadBufferFromPocoSocket.h>
|
#include <IO/ReadBufferFromPocoSocket.h>
|
||||||
|
#include <IO/TimeoutSetter.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/NetException.h>
|
#include <Common/NetException.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
@ -27,23 +28,23 @@ bool ReadBufferFromPocoSocket::nextImpl()
|
|||||||
ssize_t bytes_read = 0;
|
ssize_t bytes_read = 0;
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
int flags = 0;
|
|
||||||
if (async_callback)
|
|
||||||
flags |= MSG_DONTWAIT;
|
|
||||||
|
|
||||||
/// Add more details to exceptions.
|
/// Add more details to exceptions.
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
|
/// If async_callback is specified, and read will block, run async_callback and try again later.
|
||||||
|
|
||||||
/// If async_callback is specified, and read is blocking, run async_callback and try again later.
|
|
||||||
/// It is expected that file descriptor may be polled externally.
|
/// It is expected that file descriptor may be polled externally.
|
||||||
/// Note that receive timeout is not checked here. External code should check it while polling.
|
/// Note that receive timeout is not checked here. External code should check it while polling.
|
||||||
while (bytes_read < 0 && async_callback && errno == EAGAIN)
|
while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ))
|
||||||
{
|
|
||||||
async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
|
async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
|
||||||
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
|
|
||||||
}
|
/// receiveBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
|
||||||
|
/// but we want to get this exception exactly after receive_timeout. So, set send_timeout = receive_timeout
|
||||||
|
/// before receiveBytes.
|
||||||
|
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
|
||||||
|
if (socket.secure())
|
||||||
|
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getReceiveTimeout(), socket.getReceiveTimeout());
|
||||||
|
|
||||||
|
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
|
||||||
}
|
}
|
||||||
catch (const Poco::Net::NetException & e)
|
catch (const Poco::Net::NetException & e)
|
||||||
{
|
{
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#include "TimeoutSetter.h"
|
#include <IO/TimeoutSetter.h>
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
|
@ -1,6 +1,7 @@
|
|||||||
#include <Poco/Net/NetException.h>
|
#include <Poco/Net/NetException.h>
|
||||||
|
|
||||||
#include <IO/WriteBufferFromPocoSocket.h>
|
#include <IO/WriteBufferFromPocoSocket.h>
|
||||||
|
#include <IO/TimeoutSetter.h>
|
||||||
|
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/NetException.h>
|
#include <Common/NetException.h>
|
||||||
@ -40,6 +41,13 @@ void WriteBufferFromPocoSocket::nextImpl()
|
|||||||
/// Add more details to exceptions.
|
/// Add more details to exceptions.
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
/// sendBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
|
||||||
|
/// but we want to get this exception exactly after send_timeout. So, set receive_timeout = send_timeout
|
||||||
|
/// before sendBytes.
|
||||||
|
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
|
||||||
|
if (socket.secure())
|
||||||
|
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getSendTimeout(), socket.getSendTimeout());
|
||||||
|
|
||||||
res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
|
res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
|
||||||
}
|
}
|
||||||
catch (const Poco::Net::NetException & e)
|
catch (const Poco::Net::NetException & e)
|
||||||
|
@ -120,7 +120,7 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
|||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
tryLogCurrentException(log);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ void ZlibDeflatingWriteBuffer::finish()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
finishImpl();
|
finishImpl();
|
||||||
out->next();
|
out->finalize();
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
|
@ -94,7 +94,7 @@ void ZstdDeflatingWriteBuffer::finish()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
finishImpl();
|
finishImpl();
|
||||||
out->next();
|
out->finalize();
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
|
@ -6,11 +6,6 @@
|
|||||||
#include <IO/ConcatReadBuffer.h>
|
#include <IO/ConcatReadBuffer.h>
|
||||||
#include <IO/PeekableReadBuffer.h>
|
#include <IO/PeekableReadBuffer.h>
|
||||||
|
|
||||||
namespace DB::ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int LOGICAL_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
static void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
||||||
{
|
{
|
||||||
size_t n = strlen(str);
|
size_t n = strlen(str);
|
||||||
@ -48,20 +43,6 @@ try
|
|||||||
readAndAssert(peekable, "01234");
|
readAndAssert(peekable, "01234");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ABORT_ON_LOGICAL_ERROR
|
|
||||||
bool exception = false;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
peekable.rollbackToCheckpoint();
|
|
||||||
}
|
|
||||||
catch (DB::Exception & e)
|
|
||||||
{
|
|
||||||
if (e.code() != DB::ErrorCodes::LOGICAL_ERROR)
|
|
||||||
throw;
|
|
||||||
exception = true;
|
|
||||||
}
|
|
||||||
ASSERT_TRUE(exception);
|
|
||||||
#endif
|
|
||||||
assertAvailable(peekable, "56789");
|
assertAvailable(peekable, "56789");
|
||||||
|
|
||||||
readAndAssert(peekable, "56");
|
readAndAssert(peekable, "56");
|
||||||
|
@ -50,6 +50,7 @@ SRCS(
|
|||||||
ReadBufferFromPocoSocket.cpp
|
ReadBufferFromPocoSocket.cpp
|
||||||
ReadHelpers.cpp
|
ReadHelpers.cpp
|
||||||
SeekAvoidingReadBuffer.cpp
|
SeekAvoidingReadBuffer.cpp
|
||||||
|
TimeoutSetter.cpp
|
||||||
UseSSL.cpp
|
UseSSL.cpp
|
||||||
WriteBufferFromFile.cpp
|
WriteBufferFromFile.cpp
|
||||||
WriteBufferFromFileBase.cpp
|
WriteBufferFromFileBase.cpp
|
||||||
|
@ -818,13 +818,10 @@ private:
|
|||||||
if (!min_id)
|
if (!min_id)
|
||||||
min_id = getMinIDToFinishLoading(forced_to_reload);
|
min_id = getMinIDToFinishLoading(forced_to_reload);
|
||||||
|
|
||||||
if (info->state_id >= min_id)
|
|
||||||
return true; /// stop
|
|
||||||
|
|
||||||
if (info->loading_id < min_id)
|
if (info->loading_id < min_id)
|
||||||
startLoading(*info, forced_to_reload, *min_id);
|
startLoading(*info, forced_to_reload, *min_id);
|
||||||
|
|
||||||
/// Wait for the next event if loading wasn't completed, and stop otherwise.
|
/// Wait for the next event if loading wasn't completed, or stop otherwise.
|
||||||
return (info->state_id >= min_id);
|
return (info->state_id >= min_id);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -850,9 +847,6 @@ private:
|
|||||||
if (filter && !filter(name))
|
if (filter && !filter(name))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (info.state_id >= min_id)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (info.loading_id < min_id)
|
if (info.loading_id < min_id)
|
||||||
startLoading(info, forced_to_reload, *min_id);
|
startLoading(info, forced_to_reload, *min_id);
|
||||||
|
|
||||||
|
@ -260,7 +260,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
|||||||
renamed = true;
|
renamed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
database->loadStoredObjects(context, has_force_restore_data_flag, create.attach && force_attach);
|
/// We use global context here, because storages lifetime is bigger than query context lifetime
|
||||||
|
database->loadStoredObjects(context.getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach);
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
@ -970,7 +971,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
|||||||
if (create.as_table_function)
|
if (create.as_table_function)
|
||||||
{
|
{
|
||||||
const auto & factory = TableFunctionFactory::instance();
|
const auto & factory = TableFunctionFactory::instance();
|
||||||
res = factory.get(create.as_table_function, context)->execute(create.as_table_function, context, create.table, properties.columns);
|
auto table_func = factory.get(create.as_table_function, context);
|
||||||
|
res = table_func->execute(create.as_table_function, context, create.table, properties.columns);
|
||||||
res->renameInMemory({create.database, create.table, create.uuid});
|
res->renameInMemory({create.database, create.table, create.uuid});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -393,7 +393,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
view = nullptr;
|
view = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try_move_to_prewhere && storage && query.where() && !query.prewhere() && !query.final())
|
if (try_move_to_prewhere && storage && query.where() && !query.prewhere())
|
||||||
{
|
{
|
||||||
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
|
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
|
||||||
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
|
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
|
||||||
|
@ -9,8 +9,6 @@
|
|||||||
#include <Common/ActionBlocker.h>
|
#include <Common/ActionBlocker.h>
|
||||||
#include <common/types.h>
|
#include <common/types.h>
|
||||||
|
|
||||||
#include <Poco/Net/HTMLForm.h>
|
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <Interpreters/WindowDescription.h>
|
#include <Interpreters/WindowDescription.h>
|
||||||
|
|
||||||
|
#include <Core/Field.h>
|
||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
|
|
||||||
@ -60,7 +61,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
buf << abs(begin_offset);
|
buf << applyVisitor(FieldVisitorToString(), begin_offset);
|
||||||
buf << " "
|
buf << " "
|
||||||
<< (begin_preceding ? "PRECEDING" : "FOLLOWING");
|
<< (begin_preceding ? "PRECEDING" : "FOLLOWING");
|
||||||
}
|
}
|
||||||
@ -77,7 +78,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
buf << abs(end_offset);
|
buf << applyVisitor(FieldVisitorToString(), end_offset);
|
||||||
buf << " "
|
buf << " "
|
||||||
<< (end_preceding ? "PRECEDING" : "FOLLOWING");
|
<< (end_preceding ? "PRECEDING" : "FOLLOWING");
|
||||||
}
|
}
|
||||||
@ -121,23 +122,33 @@ void WindowFrame::checkValid() const
|
|||||||
if (end_type == BoundaryType::Offset
|
if (end_type == BoundaryType::Offset
|
||||||
&& begin_type == BoundaryType::Offset)
|
&& begin_type == BoundaryType::Offset)
|
||||||
{
|
{
|
||||||
// Frame starting with following rows can't have preceding rows.
|
// Frame start offset must be less or equal that the frame end offset.
|
||||||
if (!(end_preceding && !begin_preceding))
|
bool begin_less_equal_end;
|
||||||
|
if (begin_preceding && end_preceding)
|
||||||
{
|
{
|
||||||
// Frame start offset must be less or equal that the frame end offset.
|
begin_less_equal_end = begin_offset >= end_offset;
|
||||||
const bool begin_before_end
|
|
||||||
= begin_offset * (begin_preceding ? -1 : 1)
|
|
||||||
<= end_offset * (end_preceding ? -1 : 1);
|
|
||||||
|
|
||||||
if (!begin_before_end)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Frame start offset {} {} does not precede the frame end offset {} {}",
|
|
||||||
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
|
|
||||||
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
else if (begin_preceding && !end_preceding)
|
||||||
|
{
|
||||||
|
begin_less_equal_end = true;
|
||||||
|
}
|
||||||
|
else if (!begin_preceding && end_preceding)
|
||||||
|
{
|
||||||
|
begin_less_equal_end = false;
|
||||||
|
}
|
||||||
|
else /* if (!begin_preceding && !end_preceding) */
|
||||||
|
{
|
||||||
|
begin_less_equal_end = begin_offset <= end_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!begin_less_equal_end)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Frame start offset {} {} does not precede the frame end offset {} {}",
|
||||||
|
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
|
||||||
|
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
|
||||||
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
@ -44,14 +44,13 @@ struct WindowFrame
|
|||||||
// Offset might be both preceding and following, controlled by begin_preceding,
|
// Offset might be both preceding and following, controlled by begin_preceding,
|
||||||
// but the offset value must be positive.
|
// but the offset value must be positive.
|
||||||
BoundaryType begin_type = BoundaryType::Unbounded;
|
BoundaryType begin_type = BoundaryType::Unbounded;
|
||||||
// This should have been a Field but I'm getting some crazy linker errors.
|
Field begin_offset = 0;
|
||||||
int64_t begin_offset = 0;
|
|
||||||
bool begin_preceding = true;
|
bool begin_preceding = true;
|
||||||
|
|
||||||
// Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
|
// Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
|
||||||
// must be false.
|
// must be false.
|
||||||
BoundaryType end_type = BoundaryType::Current;
|
BoundaryType end_type = BoundaryType::Current;
|
||||||
int64_t end_offset = 0;
|
Field end_offset = 0;
|
||||||
bool end_preceding = false;
|
bool end_preceding = false;
|
||||||
|
|
||||||
|
|
||||||
|
@ -377,6 +377,11 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
|
|||||||
else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
|
else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
|
||||||
{
|
{
|
||||||
const IDataType & nested_type = *nullable_type->getNestedType();
|
const IDataType & nested_type = *nullable_type->getNestedType();
|
||||||
|
|
||||||
|
/// NULL remains NULL after any conversion.
|
||||||
|
if (WhichDataType(nested_type).isNothing())
|
||||||
|
return {};
|
||||||
|
|
||||||
if (from_type_hint && from_type_hint->equals(nested_type))
|
if (from_type_hint && from_type_hint->equals(nested_type))
|
||||||
return from_value;
|
return from_value;
|
||||||
return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);
|
return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);
|
||||||
|
@ -137,8 +137,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
|
|||||||
if (window())
|
if (window())
|
||||||
{
|
{
|
||||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str <<
|
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str <<
|
||||||
"WINDOW " << (s.hilite ? hilite_none : "");
|
"WINDOW" << (s.hilite ? hilite_none : "");
|
||||||
window()->formatImpl(s, state, frame);
|
window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orderBy())
|
if (orderBy())
|
||||||
|
@ -35,6 +35,8 @@ String ASTWindowDefinition::getID(char) const
|
|||||||
void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
||||||
FormatState & state, FormatStateStacked format_frame) const
|
FormatState & state, FormatStateStacked format_frame) const
|
||||||
{
|
{
|
||||||
|
format_frame.expression_list_prepend_whitespace = false;
|
||||||
|
|
||||||
if (partition_by)
|
if (partition_by)
|
||||||
{
|
{
|
||||||
settings.ostr << "PARTITION BY ";
|
settings.ostr << "PARTITION BY ";
|
||||||
@ -70,7 +72,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
settings.ostr << abs(frame.begin_offset);
|
settings.ostr << applyVisitor(FieldVisitorToString(),
|
||||||
|
frame.begin_offset);
|
||||||
settings.ostr << " "
|
settings.ostr << " "
|
||||||
<< (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
|
<< (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
|
||||||
}
|
}
|
||||||
@ -85,7 +88,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
settings.ostr << abs(frame.end_offset);
|
settings.ostr << applyVisitor(FieldVisitorToString(),
|
||||||
|
frame.end_offset);
|
||||||
settings.ostr << " "
|
settings.ostr << " "
|
||||||
<< (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
|
<< (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
|
||||||
}
|
}
|
||||||
|
@ -581,30 +581,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
|
|||||||
else if (parser_literal.parse(pos, ast_literal, expected))
|
else if (parser_literal.parse(pos, ast_literal, expected))
|
||||||
{
|
{
|
||||||
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
||||||
if (!isInt64FieldType(value.getType()))
|
if ((node->frame.type == WindowFrame::FrameType::Rows
|
||||||
|
|| node->frame.type == WindowFrame::FrameType::Groups)
|
||||||
|
&& !(value.getType() == Field::Types::UInt64
|
||||||
|
|| (value.getType() == Field::Types::Int64
|
||||||
|
&& value.get<Int64>() >= 0)))
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Only integer frame offsets are supported, '{}' is not supported.",
|
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
|
||||||
|
WindowFrame::toString(node->frame.type),
|
||||||
|
applyVisitor(FieldVisitorToString(), value),
|
||||||
Field::Types::toString(value.getType()));
|
Field::Types::toString(value.getType()));
|
||||||
}
|
}
|
||||||
node->frame.begin_offset = value.get<Int64>();
|
node->frame.begin_offset = value;
|
||||||
node->frame.begin_type = WindowFrame::BoundaryType::Offset;
|
node->frame.begin_type = WindowFrame::BoundaryType::Offset;
|
||||||
// We can easily get a UINT64_MAX here, which doesn't even fit into
|
|
||||||
// int64_t. Not sure what checks we are going to need here after we
|
|
||||||
// support floats and dates.
|
|
||||||
if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Frame offset must be between {} and {}, but {} is given",
|
|
||||||
INT_MAX, INT_MIN, node->frame.begin_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node->frame.begin_offset < 0)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Frame start offset must be greater than zero, {} given",
|
|
||||||
node->frame.begin_offset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -652,28 +642,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
|
|||||||
else if (parser_literal.parse(pos, ast_literal, expected))
|
else if (parser_literal.parse(pos, ast_literal, expected))
|
||||||
{
|
{
|
||||||
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
||||||
if (!isInt64FieldType(value.getType()))
|
if ((node->frame.type == WindowFrame::FrameType::Rows
|
||||||
|
|| node->frame.type == WindowFrame::FrameType::Groups)
|
||||||
|
&& !(value.getType() == Field::Types::UInt64
|
||||||
|
|| (value.getType() == Field::Types::Int64
|
||||||
|
&& value.get<Int64>() >= 0)))
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Only integer frame offsets are supported, '{}' is not supported.",
|
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
|
||||||
|
WindowFrame::toString(node->frame.type),
|
||||||
|
applyVisitor(FieldVisitorToString(), value),
|
||||||
Field::Types::toString(value.getType()));
|
Field::Types::toString(value.getType()));
|
||||||
}
|
}
|
||||||
node->frame.end_offset = value.get<Int64>();
|
node->frame.end_offset = value;
|
||||||
node->frame.end_type = WindowFrame::BoundaryType::Offset;
|
node->frame.end_type = WindowFrame::BoundaryType::Offset;
|
||||||
|
|
||||||
if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Frame offset must be between {} and {}, but {} is given",
|
|
||||||
INT_MAX, INT_MIN, node->frame.end_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node->frame.end_offset < 0)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
||||||
"Frame end offset must be greater than zero, {} given",
|
|
||||||
node->frame.end_offset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -275,7 +275,8 @@ Token Lexer::nextTokenImpl()
|
|||||||
else
|
else
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
|
pos = end;
|
||||||
|
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Token(TokenType::Slash, token_begin, pos);
|
return Token(TokenType::Slash, token_begin, pos);
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
#include <Common/Arena.h>
|
#include <Common/Arena.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/getLeastSupertype.h>
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
#include <Interpreters/convertFieldToType.h>
|
#include <Interpreters/convertFieldToType.h>
|
||||||
|
|
||||||
@ -27,7 +28,8 @@ public:
|
|||||||
virtual ~IWindowFunction() = default;
|
virtual ~IWindowFunction() = default;
|
||||||
|
|
||||||
// Must insert the result for current_row.
|
// Must insert the result for current_row.
|
||||||
virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
|
virtual void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Compares ORDER BY column values at given rows to find the boundaries of frame:
|
// Compares ORDER BY column values at given rows to find the boundaries of frame:
|
||||||
@ -37,7 +39,7 @@ template <typename ColumnType>
|
|||||||
static int compareValuesWithOffset(const IColumn * _compared_column,
|
static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||||
size_t compared_row, const IColumn * _reference_column,
|
size_t compared_row, const IColumn * _reference_column,
|
||||||
size_t reference_row,
|
size_t reference_row,
|
||||||
uint64_t _offset,
|
const Field & _offset,
|
||||||
bool offset_is_preceding)
|
bool offset_is_preceding)
|
||||||
{
|
{
|
||||||
// Casting the columns to the known type here makes it faster, probably
|
// Casting the columns to the known type here makes it faster, probably
|
||||||
@ -46,7 +48,8 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
|||||||
_compared_column);
|
_compared_column);
|
||||||
const auto * reference_column = assert_cast<const ColumnType *>(
|
const auto * reference_column = assert_cast<const ColumnType *>(
|
||||||
_reference_column);
|
_reference_column);
|
||||||
const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
|
const auto offset = _offset.get<typename ColumnType::ValueType>();
|
||||||
|
assert(offset >= 0);
|
||||||
|
|
||||||
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
||||||
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
|
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||||
@ -101,6 +104,53 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A specialization of compareValuesWithOffset for floats.
|
||||||
|
template <typename ColumnType>
|
||||||
|
static int compareValuesWithOffsetFloat(const IColumn * _compared_column,
|
||||||
|
size_t compared_row, const IColumn * _reference_column,
|
||||||
|
size_t reference_row,
|
||||||
|
const Field & _offset,
|
||||||
|
bool offset_is_preceding)
|
||||||
|
{
|
||||||
|
// Casting the columns to the known type here makes it faster, probably
|
||||||
|
// because the getData call can be devirtualized.
|
||||||
|
const auto * compared_column = assert_cast<const ColumnType *>(
|
||||||
|
_compared_column);
|
||||||
|
const auto * reference_column = assert_cast<const ColumnType *>(
|
||||||
|
_reference_column);
|
||||||
|
const auto offset = _offset.get<typename ColumnType::ValueType>();
|
||||||
|
assert(offset >= 0);
|
||||||
|
|
||||||
|
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
||||||
|
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||||
|
auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
|
||||||
|
compared_value_data.data);
|
||||||
|
|
||||||
|
const auto reference_value_data = reference_column->getDataAt(reference_row);
|
||||||
|
assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||||
|
auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
|
||||||
|
reference_value_data.data);
|
||||||
|
|
||||||
|
// Floats overflow to Inf and the comparison will work normally, so we don't
|
||||||
|
// have to do anything.
|
||||||
|
if (offset_is_preceding)
|
||||||
|
{
|
||||||
|
reference_value -= offset;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
reference_value += offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto result = compared_value < reference_value ? -1
|
||||||
|
: compared_value == reference_value ? 0 : 1;
|
||||||
|
|
||||||
|
// fmt::print(stderr, "compared {}, offset {}, reference {}, result {}\n",
|
||||||
|
// compared_value, offset, reference_value, result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// Helper macros to dispatch on type of the ORDER BY column
|
// Helper macros to dispatch on type of the ORDER BY column
|
||||||
#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
|
#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
|
||||||
else if (typeid_cast<const TYPE *>(column)) \
|
else if (typeid_cast<const TYPE *>(column)) \
|
||||||
@ -114,14 +164,20 @@ if (false) /* NOLINT */ \
|
|||||||
{ \
|
{ \
|
||||||
/* Do nothing, a starter condition. */ \
|
/* Do nothing, a starter condition. */ \
|
||||||
} \
|
} \
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
|
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
|
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
|
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
|
|
||||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
|
||||||
|
\
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int128>) \
|
||||||
|
\
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float32>) \
|
||||||
|
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float64>) \
|
||||||
|
\
|
||||||
else \
|
else \
|
||||||
{ \
|
{ \
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
|
||||||
@ -193,9 +249,28 @@ WindowTransform::WindowTransform(const Block & input_header_,
|
|||||||
== WindowFrame::BoundaryType::Offset))
|
== WindowFrame::BoundaryType::Offset))
|
||||||
{
|
{
|
||||||
assert(order_by_indices.size() == 1);
|
assert(order_by_indices.size() == 1);
|
||||||
const IColumn * column = input_header.getByPosition(
|
const auto & entry = input_header.getByPosition(order_by_indices[0]);
|
||||||
order_by_indices[0]).column.get();
|
const IColumn * column = entry.column.get();
|
||||||
APPLY_FOR_TYPES(compareValuesWithOffset)
|
APPLY_FOR_TYPES(compareValuesWithOffset)
|
||||||
|
|
||||||
|
// Check that the offset type matches the window type.
|
||||||
|
// Convert the offsets to the ORDER BY column type. We can't just check
|
||||||
|
// that it matches, because e.g. the int literals are always (U)Int64,
|
||||||
|
// but the column might be Int8 and so on.
|
||||||
|
if (window_description.frame.begin_type
|
||||||
|
== WindowFrame::BoundaryType::Offset)
|
||||||
|
{
|
||||||
|
window_description.frame.begin_offset = convertFieldToTypeOrThrow(
|
||||||
|
window_description.frame.begin_offset,
|
||||||
|
*entry.type);
|
||||||
|
}
|
||||||
|
if (window_description.frame.end_type
|
||||||
|
== WindowFrame::BoundaryType::Offset)
|
||||||
|
{
|
||||||
|
window_description.frame.end_offset = convertFieldToTypeOrThrow(
|
||||||
|
window_description.frame.end_offset,
|
||||||
|
*entry.type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -391,7 +466,7 @@ void WindowTransform::advanceFrameStartRowsOffset()
|
|||||||
{
|
{
|
||||||
// Just recalculate it each time by walking blocks.
|
// Just recalculate it each time by walking blocks.
|
||||||
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
||||||
window_description.frame.begin_offset
|
window_description.frame.begin_offset.get<UInt64>()
|
||||||
* (window_description.frame.begin_preceding ? -1 : 1));
|
* (window_description.frame.begin_preceding ? -1 : 1));
|
||||||
|
|
||||||
frame_start = moved_row;
|
frame_start = moved_row;
|
||||||
@ -638,7 +713,7 @@ void WindowTransform::advanceFrameEndRowsOffset()
|
|||||||
// Walk the specified offset from the current row. The "+1" is needed
|
// Walk the specified offset from the current row. The "+1" is needed
|
||||||
// because the frame_end is a past-the-end pointer.
|
// because the frame_end is a past-the-end pointer.
|
||||||
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
||||||
window_description.frame.end_offset
|
window_description.frame.end_offset.get<UInt64>()
|
||||||
* (window_description.frame.end_preceding ? -1 : 1)
|
* (window_description.frame.end_preceding ? -1 : 1)
|
||||||
+ 1);
|
+ 1);
|
||||||
|
|
||||||
@ -852,14 +927,14 @@ void WindowTransform::writeOutCurrentRow()
|
|||||||
for (size_t wi = 0; wi < workspaces.size(); ++wi)
|
for (size_t wi = 0; wi < workspaces.size(); ++wi)
|
||||||
{
|
{
|
||||||
auto & ws = workspaces[wi];
|
auto & ws = workspaces[wi];
|
||||||
IColumn * result_column = block.output_columns[wi].get();
|
|
||||||
|
|
||||||
if (ws.window_function_impl)
|
if (ws.window_function_impl)
|
||||||
{
|
{
|
||||||
ws.window_function_impl->windowInsertResultInto(*result_column, this);
|
ws.window_function_impl->windowInsertResultInto(this, wi);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
IColumn * result_column = block.output_columns[wi].get();
|
||||||
const auto * a = ws.aggregate_function.get();
|
const auto * a = ws.aggregate_function.get();
|
||||||
auto * buf = ws.aggregate_function_state.data();
|
auto * buf = ws.aggregate_function_state.data();
|
||||||
// FIXME does it also allocate the result on the arena?
|
// FIXME does it also allocate the result on the arena?
|
||||||
@ -1280,8 +1355,11 @@ struct WindowFunctionRank final : public WindowFunction
|
|||||||
DataTypePtr getReturnType() const override
|
DataTypePtr getReturnType() const override
|
||||||
{ return std::make_shared<DataTypeUInt64>(); }
|
{ return std::make_shared<DataTypeUInt64>(); }
|
||||||
|
|
||||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) override
|
||||||
{
|
{
|
||||||
|
IColumn & to = *transform->blockAt(transform->current_row)
|
||||||
|
.output_columns[function_index];
|
||||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||||
transform->peer_group_start_row_number);
|
transform->peer_group_start_row_number);
|
||||||
}
|
}
|
||||||
@ -1297,8 +1375,11 @@ struct WindowFunctionDenseRank final : public WindowFunction
|
|||||||
DataTypePtr getReturnType() const override
|
DataTypePtr getReturnType() const override
|
||||||
{ return std::make_shared<DataTypeUInt64>(); }
|
{ return std::make_shared<DataTypeUInt64>(); }
|
||||||
|
|
||||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) override
|
||||||
{
|
{
|
||||||
|
IColumn & to = *transform->blockAt(transform->current_row)
|
||||||
|
.output_columns[function_index];
|
||||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||||
transform->peer_group_number);
|
transform->peer_group_number);
|
||||||
}
|
}
|
||||||
@ -1314,13 +1395,123 @@ struct WindowFunctionRowNumber final : public WindowFunction
|
|||||||
DataTypePtr getReturnType() const override
|
DataTypePtr getReturnType() const override
|
||||||
{ return std::make_shared<DataTypeUInt64>(); }
|
{ return std::make_shared<DataTypeUInt64>(); }
|
||||||
|
|
||||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) override
|
||||||
{
|
{
|
||||||
|
IColumn & to = *transform->blockAt(transform->current_row)
|
||||||
|
.output_columns[function_index];
|
||||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||||
transform->current_row_number);
|
transform->current_row_number);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ClickHouse-specific variant of lag/lead that respects the window frame.
|
||||||
|
template <bool is_lead>
|
||||||
|
struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||||
|
{
|
||||||
|
WindowFunctionLagLeadInFrame(const std::string & name_,
|
||||||
|
const DataTypes & argument_types_, const Array & parameters_)
|
||||||
|
: WindowFunction(name_, argument_types_, parameters_)
|
||||||
|
{
|
||||||
|
if (!parameters.empty())
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Function {} cannot be parameterized", name_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argument_types.empty())
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Function {} takes at least one argument", name_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argument_types.size() == 1)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isInt64FieldType(argument_types[1]->getDefault().getType()))
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Offset must be an integer, '{}' given",
|
||||||
|
argument_types[1]->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argument_types.size() == 2)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!getLeastSupertype({argument_types[0], argument_types[2]}))
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"The default value type '{}' is not convertible to the argument type '{}'",
|
||||||
|
argument_types[2]->getName(),
|
||||||
|
argument_types[0]->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argument_types.size() > 3)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Function '{}' accepts at most 3 arguments, {} given",
|
||||||
|
name, argument_types.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr getReturnType() const override
|
||||||
|
{ return argument_types[0]; }
|
||||||
|
|
||||||
|
void windowInsertResultInto(const WindowTransform * transform,
|
||||||
|
size_t function_index) override
|
||||||
|
{
|
||||||
|
const auto & current_block = transform->blockAt(transform->current_row);
|
||||||
|
IColumn & to = *current_block.output_columns[function_index];
|
||||||
|
const auto & workspace = transform->workspaces[function_index];
|
||||||
|
|
||||||
|
int offset = 1;
|
||||||
|
if (argument_types.size() > 1)
|
||||||
|
{
|
||||||
|
offset = (*current_block.input_columns[
|
||||||
|
workspace.argument_column_indices[1]])[
|
||||||
|
transform->current_row.row].get<Int64>();
|
||||||
|
if (offset < 0)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"The offset for function {} must be nonnegative, {} given",
|
||||||
|
getName(), offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [target_row, offset_left] = transform->moveRowNumber(
|
||||||
|
transform->current_row, offset * (is_lead ? 1 : -1));
|
||||||
|
|
||||||
|
if (offset_left != 0
|
||||||
|
|| target_row < transform->frame_start
|
||||||
|
|| transform->frame_end <= target_row)
|
||||||
|
{
|
||||||
|
// Offset is outside the frame.
|
||||||
|
if (argument_types.size() > 2)
|
||||||
|
{
|
||||||
|
// Column with default values is specified.
|
||||||
|
to.insertFrom(*current_block.input_columns[
|
||||||
|
workspace.argument_column_indices[2]],
|
||||||
|
transform->current_row.row);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
to.insertDefault();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Offset is inside the frame.
|
||||||
|
to.insertFrom(*transform->blockAt(target_row).input_columns[
|
||||||
|
workspace.argument_column_indices[0]],
|
||||||
|
target_row.row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void registerWindowFunctions(AggregateFunctionFactory & factory)
|
void registerWindowFunctions(AggregateFunctionFactory & factory)
|
||||||
{
|
{
|
||||||
// Why didn't I implement lag/lead yet? Because they are a mess. I imagine
|
// Why didn't I implement lag/lead yet? Because they are a mess. I imagine
|
||||||
@ -1332,9 +1523,10 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
|||||||
// the whole partition like Postgres does, because using a linear amount
|
// the whole partition like Postgres does, because using a linear amount
|
||||||
// of additional memory is not an option when we have a lot of data. We must
|
// of additional memory is not an option when we have a lot of data. We must
|
||||||
// be able to process at least the lag/lead in streaming fashion.
|
// be able to process at least the lag/lead in streaming fashion.
|
||||||
// Our best bet is probably rewriting, say `lag(value, offset)` to
|
// A partial solution for constant offsets is rewriting, say `lag(value, offset)
|
||||||
// `any(value) over (rows between offset preceding and offset preceding)`,
|
// to `any(value) over (rows between offset preceding and offset preceding)`.
|
||||||
// at the query planning stage.
|
// We also implement non-standard functions `lag/leadInFrame`, that are
|
||||||
|
// analogous to `lag/lead`, but respect the frame.
|
||||||
// Functions like cume_dist() do require materializing the entire
|
// Functions like cume_dist() do require materializing the entire
|
||||||
// partition, but it's probably also simpler to implement them by rewriting
|
// partition, but it's probably also simpler to implement them by rewriting
|
||||||
// to a (rows between unbounded preceding and unbounded following) frame,
|
// to a (rows between unbounded preceding and unbounded following) frame,
|
||||||
@ -1360,6 +1552,20 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
|||||||
return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
|
return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
|
||||||
parameters);
|
parameters);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
factory.registerFunction("lagInFrame", [](const std::string & name,
|
||||||
|
const DataTypes & argument_types, const Array & parameters)
|
||||||
|
{
|
||||||
|
return std::make_shared<WindowFunctionLagLeadInFrame<false>>(
|
||||||
|
name, argument_types, parameters);
|
||||||
|
});
|
||||||
|
|
||||||
|
factory.registerFunction("leadInFrame", [](const std::string & name,
|
||||||
|
const DataTypes & argument_types, const Array & parameters)
|
||||||
|
{
|
||||||
|
return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
|
||||||
|
name, argument_types, parameters);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -110,7 +110,9 @@ public:
|
|||||||
Status prepare() override;
|
Status prepare() override;
|
||||||
void work() override;
|
void work() override;
|
||||||
|
|
||||||
private:
|
/*
|
||||||
|
* Implementation details.
|
||||||
|
*/
|
||||||
void advancePartitionEnd();
|
void advancePartitionEnd();
|
||||||
|
|
||||||
bool arePeers(const RowNumber & x, const RowNumber & y) const;
|
bool arePeers(const RowNumber & x, const RowNumber & y) const;
|
||||||
@ -321,10 +323,7 @@ public:
|
|||||||
int (* compare_values_with_offset) (
|
int (* compare_values_with_offset) (
|
||||||
const IColumn * compared_column, size_t compared_row,
|
const IColumn * compared_column, size_t compared_row,
|
||||||
const IColumn * reference_column, size_t reference_row,
|
const IColumn * reference_column, size_t reference_row,
|
||||||
// We can make it a Field later if we need the Decimals. Now we only
|
const Field & offset,
|
||||||
// have ints and datetime, and the underlying Field type for them is
|
|
||||||
// uint64_t anyway.
|
|
||||||
uint64_t offset,
|
|
||||||
bool offset_is_preceding);
|
bool offset_is_preceding);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -369,6 +369,11 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
|
|||||||
else
|
else
|
||||||
boundary_hit = startsWith(line, boundary);
|
boundary_hit = startsWith(line, boundary);
|
||||||
|
|
||||||
|
if (!line.empty())
|
||||||
|
/// If we don't make sure that memory is contiguous then situation may happen, when part of the line is inside internal memory
|
||||||
|
/// and other part is inside sub-buffer, thus we'll be unable to setup our working buffer properly.
|
||||||
|
in.makeContinuousMemoryFromCheckpointToPos();
|
||||||
|
|
||||||
in.rollbackToCheckpoint(true);
|
in.rollbackToCheckpoint(true);
|
||||||
|
|
||||||
/// Rolling back to checkpoint may change underlying buffers.
|
/// Rolling back to checkpoint may change underlying buffers.
|
||||||
|
@ -107,6 +107,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
|
|||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
|
tryLogCurrentException(log);
|
||||||
out.finalize();
|
out.finalize();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -116,6 +117,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
|
|||||||
if (auto [message, success] = checkAuthentication(request); success)
|
if (auto [message, success] = checkAuthentication(request); success)
|
||||||
{
|
{
|
||||||
processQuery(request, response, used_output);
|
processQuery(request, response, used_output);
|
||||||
|
used_output.out->finalize();
|
||||||
LOG_DEBUG(log, "Done processing query");
|
LOG_DEBUG(log, "Done processing query");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -240,16 +240,10 @@ Poco::Timespan NuKeeperTCPHandler::receiveHandshake()
|
|||||||
throw Exception("Unexpected protocol version: " + toString(protocol_version), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
throw Exception("Unexpected protocol version: " + toString(protocol_version), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
||||||
|
|
||||||
Coordination::read(last_zxid_seen, *in);
|
Coordination::read(last_zxid_seen, *in);
|
||||||
|
|
||||||
if (last_zxid_seen != 0)
|
|
||||||
throw Exception("Non zero last_zxid_seen is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
|
||||||
|
|
||||||
Coordination::read(timeout_ms, *in);
|
Coordination::read(timeout_ms, *in);
|
||||||
|
|
||||||
|
/// TODO Stop ignoring this value
|
||||||
Coordination::read(previous_session_id, *in);
|
Coordination::read(previous_session_id, *in);
|
||||||
|
|
||||||
if (previous_session_id != 0)
|
|
||||||
throw Exception("Non zero previous session id is not supported", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
|
||||||
|
|
||||||
Coordination::read(passwd, *in);
|
Coordination::read(passwd, *in);
|
||||||
|
|
||||||
int8_t readonly;
|
int8_t readonly;
|
||||||
|
@ -8,10 +8,10 @@
|
|||||||
#include <Core/Protocol.h>
|
#include <Core/Protocol.h>
|
||||||
#include <Core/QueryProcessingStage.h>
|
#include <Core/QueryProcessingStage.h>
|
||||||
#include <IO/Progress.h>
|
#include <IO/Progress.h>
|
||||||
|
#include <IO/TimeoutSetter.h>
|
||||||
#include <DataStreams/BlockIO.h>
|
#include <DataStreams/BlockIO.h>
|
||||||
#include <Interpreters/InternalTextLogsQueue.h>
|
#include <Interpreters/InternalTextLogsQueue.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Client/TimeoutSetter.h>
|
|
||||||
|
|
||||||
#include "IServer.h"
|
#include "IServer.h"
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl
|
|||||||
HDFSBuilderWrapper builder;
|
HDFSBuilderWrapper builder;
|
||||||
HDFSFSPtr fs;
|
HDFSFSPtr fs;
|
||||||
|
|
||||||
explicit ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
|
ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
|
||||||
const Poco::Util::AbstractConfiguration & config_)
|
const Poco::Util::AbstractConfiguration & config_)
|
||||||
: hdfs_uri(hdfs_name_),
|
: hdfs_uri(hdfs_name_),
|
||||||
builder(createHDFSBuilder(hdfs_uri, config_))
|
builder(createHDFSBuilder(hdfs_uri, config_))
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
#include <Storages/IStorage.h>
|
#include <Storages/IStorage.h>
|
||||||
|
|
||||||
#include <sparsehash/dense_hash_map>
|
|
||||||
#include <sparsehash/dense_hash_set>
|
|
||||||
|
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/quoteString.h>
|
#include <Common/quoteString.h>
|
||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user