mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix_issue_22028
This commit is contained in:
commit
9761db7efb
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -93,7 +93,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/libunwind.git
|
||||
[submodule "contrib/simdjson"]
|
||||
path = contrib/simdjson
|
||||
url = https://github.com/ClickHouse-Extras/simdjson.git
|
||||
url = https://github.com/simdjson/simdjson.git
|
||||
[submodule "contrib/rapidjson"]
|
||||
path = contrib/rapidjson
|
||||
url = https://github.com/ClickHouse-Extras/rapidjson
|
||||
|
@ -1069,11 +1069,11 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline LUTIndex addMonthsIndex(DateOrTime v, Int64 delta) const
|
||||
inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
|
||||
{
|
||||
const Values & values = lut[toLUTIndex(v)];
|
||||
|
||||
Int64 month = static_cast<Int64>(values.month) + delta;
|
||||
Int64 month = values.month + delta;
|
||||
|
||||
if (month > 0)
|
||||
{
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 3d3683e77753cfe015a05fae95ddf418e19f59e1
|
||||
Subproject commit 70468326ad5d72e9497944838484c591dae054ea
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
||||
Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc
|
||||
Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
|
2
contrib/simdjson
vendored
2
contrib/simdjson
vendored
@ -1 +1 @@
|
||||
Subproject commit 3190d66a49059092a1753dc35595923debfc1698
|
||||
Subproject commit 95b4870e20be5f97d9dcf63b23b1c6f520c366c1
|
@ -18,6 +18,7 @@ RUN apt-get update \
|
||||
clickhouse-client=$version \
|
||||
clickhouse-common-static=$version \
|
||||
locales \
|
||||
tzdata \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
|
||||
&& apt-get clean
|
||||
|
||||
|
@ -32,6 +32,7 @@ RUN groupadd -r clickhouse --gid=101 \
|
||||
clickhouse-server=$version \
|
||||
locales \
|
||||
wget \
|
||||
tzdata \
|
||||
&& rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
|
@ -21,7 +21,9 @@ RUN addgroup -S -g 101 clickhouse \
|
||||
&& chown clickhouse:clickhouse /var/lib/clickhouse \
|
||||
&& chown root:clickhouse /var/log/clickhouse-server \
|
||||
&& chmod +x /entrypoint.sh \
|
||||
&& apk add --no-cache su-exec bash \
|
||||
&& apk add --no-cache su-exec bash tzdata \
|
||||
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
|
||||
&& echo "UTC" > /etc/timezone \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||
|
||||
# we need to allow "others" access to clickhouse folder, because docker container
|
||||
|
@ -46,9 +46,11 @@ DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname "$LOG_PATH" || true)"
|
||||
LOG_DIR=""
|
||||
if [ -n "$LOG_PATH" ]; then LOG_DIR="$(dirname "$LOG_PATH")"; fi
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
|
||||
ERROR_LOG_DIR=""
|
||||
if [ -n "$ERROR_LOG_PATH" ]; then ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH")"; fi
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
|
||||
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
|
@ -292,6 +292,7 @@ function run_tests
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01663_aes_msan # Depends on OpenSSL
|
||||
01667_aes_args_check # Depends on OpenSSL
|
||||
01776_decrypt_aead_size_check # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
|
@ -266,14 +266,13 @@ for query_index in queries_to_run:
|
||||
|
||||
try:
|
||||
# Will also detect too long queries during warmup stage
|
||||
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
|
||||
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (prewarm_id, *e.args)
|
||||
e.message = prewarm_id + ': ' + e.message
|
||||
raise
|
||||
|
||||
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
@ -320,7 +319,7 @@ for query_index in queries_to_run:
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id)
|
||||
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
|
@ -2,7 +2,6 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
|
||||
|
||||
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
|
||||
RUN mkdir /sqlancer && \
|
||||
cd /sqlancer && \
|
||||
|
@ -26,6 +26,7 @@ def process_result(result_folder):
|
||||
with open(err_path, 'r') as f:
|
||||
if 'AssertionError' in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
status = 'failure'
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
|
@ -11,7 +11,7 @@ service clickhouse-server start && sleep 5
|
||||
|
||||
cd /sqlancer/sqlancer-master
|
||||
|
||||
export TIMEOUT=60
|
||||
export TIMEOUT=300
|
||||
export NUM_QUERIES=1000
|
||||
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
|
||||
|
@ -3,7 +3,7 @@ toc_priority: 8
|
||||
toc_title: PostgreSQL
|
||||
---
|
||||
|
||||
# PosgtreSQL {#postgresql}
|
||||
# PostgreSQL {#postgresql}
|
||||
|
||||
The PostgreSQL engine allows you to perform `SELECT` queries on data that is stored on a remote PostgreSQL server.
|
||||
|
||||
|
@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
|
||||
Using [CLI interface](../../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
|
||||
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
|
||||
```
|
||||
|
||||
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
|
||||
|
@ -50,7 +50,7 @@ The supported formats are:
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✗ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
@ -1284,32 +1284,33 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
|
||||
|
||||
## ORC {#data-format-orc}
|
||||
|
||||
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse.
|
||||
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the [Hadoop](https://hadoop.apache.org/) ecosystem.
|
||||
|
||||
### Data Types Matching {#data_types-matching-3}
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` queries.
|
||||
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| ORC data type (`INSERT`) | ClickHouse data type |
|
||||
|--------------------------|-----------------------------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|--------------------------|-----------------------------------------------------|--------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||
|
||||
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
|
||||
|
||||
Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||
Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||
|
||||
The data types of ClickHouse table columns don’t have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
|
||||
|
||||
@ -1321,6 +1322,14 @@ You can insert ORC data from a file into ClickHouse table by the following comma
|
||||
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
||||
```
|
||||
|
||||
### Selecting Data {#selecting-data-2}
|
||||
|
||||
You can select data from a ClickHouse table and save them into some file in the ORC format by the following command:
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
|
||||
```
|
||||
|
||||
To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md).
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
@ -9,7 +9,7 @@ Columns:
|
||||
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
|
||||
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
|
||||
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
|
||||
- `last_error_stacktrace` ([String](../../sql-reference/data-types/string.md)) — stacktrace for the last error.
|
||||
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
|
||||
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
|
||||
|
||||
**Example**
|
||||
@ -25,3 +25,12 @@ LIMIT 1
|
||||
│ CANNOT_OPEN_FILE │ 76 │ 1 │
|
||||
└──────────────────┴──────┴───────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all
|
||||
SELECT name, arrayStringConcat(all, '\n') AS res
|
||||
FROM system.errors
|
||||
LIMIT 1
|
||||
SETTINGS allow_introspection_functions=1\G
|
||||
```
|
||||
|
||||
|
@ -320,8 +320,6 @@ Similar to `cache`, but stores data on SSD and index in RAM.
|
||||
<write_buffer_size>1048576</write_buffer_size>
|
||||
<!-- Path where cache file will be stored. -->
|
||||
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
||||
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
|
||||
<max_stored_keys>1048576</max_stored_keys>
|
||||
</ssd_cache>
|
||||
</layout>
|
||||
```
|
||||
@ -329,8 +327,8 @@ Similar to `cache`, but stores data on SSD and index in RAM.
|
||||
or
|
||||
|
||||
``` sql
|
||||
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
|
||||
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
|
||||
```
|
||||
|
||||
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
||||
|
@ -23,7 +23,9 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
||||
| `GROUPS` frame | not supported |
|
||||
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
|
||||
| `rank()`, `dense_rank()`, `row_number()` | supported |
|
||||
| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
||||
| `lag/lead(value, offset)` | Not supported. Workarounds: |
|
||||
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
|
||||
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||
|
||||
## References
|
||||
|
||||
|
@ -49,7 +49,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✗ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
@ -1203,45 +1203,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
|
||||
|
||||
## ORC {#data-format-orc}
|
||||
|
||||
[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
|
||||
[Apache ORC](https://orc.apache.org/) — это столбцовый формат данных, распространенный в экосистеме [Hadoop](https://hadoop.apache.org/).
|
||||
|
||||
### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1}
|
||||
|
||||
Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT`.
|
||||
Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
|
||||
|
||||
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse |
|
||||
|---------------------------|-----------------------------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
|
||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
|
||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
|
||||
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | Тип данных ORC (`SELECT`) |
|
||||
|---------------------------|-----------------------------------------------------|---------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
|
||||
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
|
||||
|
||||
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
|
||||
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
|
||||
|
||||
Неподдержанные типы данных ORC: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||
Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
|
||||
|
||||
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных, ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
|
||||
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
|
||||
|
||||
### Вставка данных {#vstavka-dannykh-1}
|
||||
|
||||
Данные ORC можно вставить в таблицу ClickHouse командой:
|
||||
Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида:
|
||||
|
||||
``` bash
|
||||
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
||||
```
|
||||
|
||||
Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
|
||||
### Вывод данных {#vyvod-dannykh-1}
|
||||
|
||||
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
|
||||
```
|
||||
|
||||
Для обмена данных с экосистемой Hadoop вы можете использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
||||
|
@ -318,8 +318,6 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
|
||||
<write_buffer_size>1048576</write_buffer_size>
|
||||
<!-- Path where cache file will be stored. -->
|
||||
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
|
||||
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
|
||||
<max_stored_keys>1048576</max_stored_keys>
|
||||
</ssd_cache>
|
||||
</layout>
|
||||
```
|
||||
@ -327,8 +325,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
|
||||
или
|
||||
|
||||
``` sql
|
||||
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
|
||||
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
|
||||
```
|
||||
|
||||
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
||||
|
@ -672,7 +672,7 @@ neighbor(column, offset[, default_value])
|
||||
Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных.
|
||||
|
||||
Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю.
|
||||
Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию изне подзапроса.
|
||||
Чтобы этого не случилось, вы можете сделать подзапрос с [ORDER BY](../../sql-reference/statements/select/order-by.md) и вызвать функцию извне подзапроса.
|
||||
|
||||
**Аргументы**
|
||||
|
||||
|
@ -26,7 +26,7 @@ numpy==1.19.2
|
||||
Pygments==2.5.2
|
||||
pymdown-extensions==8.0
|
||||
python-slugify==4.0.1
|
||||
PyYAML==5.3.1
|
||||
PyYAML==5.4.1
|
||||
repackage==0.7.3
|
||||
requests==2.24.0
|
||||
singledispatch==3.4.0.3
|
||||
|
@ -8,10 +8,10 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/TimeoutSetter.h>
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <DataStreams/NativeBlockOutputStream.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <Client/TimeoutSetter.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NetException.h>
|
||||
|
@ -16,7 +16,6 @@ SRCS(
|
||||
HedgedConnections.cpp
|
||||
HedgedConnectionsFactory.cpp
|
||||
MultiplexedConnections.cpp
|
||||
TimeoutSetter.cpp
|
||||
|
||||
)
|
||||
|
||||
|
@ -560,7 +560,7 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
#define M(VALUE, NAME) extern const Value NAME = VALUE;
|
||||
#define M(VALUE, NAME) extern const ErrorCode NAME = VALUE;
|
||||
APPLY_FOR_ERROR_CODES(M)
|
||||
#undef M
|
||||
|
||||
@ -587,7 +587,7 @@ namespace ErrorCodes
|
||||
|
||||
ErrorCode end() { return END + 1; }
|
||||
|
||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace)
|
||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace)
|
||||
{
|
||||
if (error_code >= end())
|
||||
{
|
||||
@ -596,10 +596,10 @@ namespace ErrorCodes
|
||||
error_code = end() - 1;
|
||||
}
|
||||
|
||||
values[error_code].increment(remote, message, stacktrace);
|
||||
values[error_code].increment(remote, message, trace);
|
||||
}
|
||||
|
||||
void ErrorPairHolder::increment(bool remote, const std::string & message, const std::string & stacktrace)
|
||||
void ErrorPairHolder::increment(bool remote, const std::string & message, const FramePointers & trace)
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
@ -609,7 +609,7 @@ namespace ErrorCodes
|
||||
|
||||
++error.count;
|
||||
error.message = message;
|
||||
error.stacktrace = stacktrace;
|
||||
error.trace = trace;
|
||||
error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
|
||||
}
|
||||
ErrorPair ErrorPairHolder::get()
|
||||
|
@ -1,11 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <mutex>
|
||||
#include <common/types.h>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <common/types.h>
|
||||
|
||||
/** Allows to count number of simultaneously happening error codes.
|
||||
* See also Exception.cpp for incrementing part.
|
||||
@ -19,6 +20,7 @@ namespace ErrorCodes
|
||||
/// ErrorCode identifier (index in array).
|
||||
using ErrorCode = int;
|
||||
using Value = size_t;
|
||||
using FramePointers = std::vector<void *>;
|
||||
|
||||
/// Get name of error_code by identifier.
|
||||
/// Returns statically allocated string.
|
||||
@ -33,7 +35,7 @@ namespace ErrorCodes
|
||||
/// Message for the last error.
|
||||
std::string message;
|
||||
/// Stacktrace for the last error.
|
||||
std::string stacktrace;
|
||||
FramePointers trace;
|
||||
};
|
||||
struct ErrorPair
|
||||
{
|
||||
@ -46,7 +48,7 @@ namespace ErrorCodes
|
||||
{
|
||||
public:
|
||||
ErrorPair get();
|
||||
void increment(bool remote, const std::string & message, const std::string & stacktrace);
|
||||
void increment(bool remote, const std::string & message, const FramePointers & trace);
|
||||
|
||||
private:
|
||||
ErrorPair value;
|
||||
@ -60,7 +62,7 @@ namespace ErrorCodes
|
||||
ErrorCode end();
|
||||
|
||||
/// Add value for specified error_code.
|
||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace);
|
||||
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ namespace ErrorCodes
|
||||
|
||||
/// - Aborts the process if error code is LOGICAL_ERROR.
|
||||
/// - Increments error codes statistics.
|
||||
void handle_error_code([[maybe_unused]] const std::string & msg, const std::string & stacktrace, int code, bool remote)
|
||||
void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
|
||||
{
|
||||
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
|
||||
// Log the message before we fail.
|
||||
@ -47,20 +47,21 @@ void handle_error_code([[maybe_unused]] const std::string & msg, const std::stri
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
ErrorCodes::increment(code, remote, msg, stacktrace);
|
||||
|
||||
ErrorCodes::increment(code, remote, msg, trace);
|
||||
}
|
||||
|
||||
Exception::Exception(const std::string & msg, int code, bool remote_)
|
||||
: Poco::Exception(msg, code)
|
||||
, remote(remote_)
|
||||
{
|
||||
handle_error_code(msg, getStackTraceString(), code, remote);
|
||||
handle_error_code(msg, code, remote, getStackFramePointers());
|
||||
}
|
||||
|
||||
Exception::Exception(const std::string & msg, const Exception & nested, int code)
|
||||
: Poco::Exception(msg, nested, code)
|
||||
{
|
||||
handle_error_code(msg, getStackTraceString(), code, remote);
|
||||
handle_error_code(msg, code, remote, getStackFramePointers());
|
||||
}
|
||||
|
||||
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
|
||||
@ -101,6 +102,31 @@ std::string Exception::getStackTraceString() const
|
||||
#endif
|
||||
}
|
||||
|
||||
Exception::FramePointers Exception::getStackFramePointers() const
|
||||
{
|
||||
FramePointers frame_pointers;
|
||||
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
|
||||
{
|
||||
frame_pointers.resize(get_stack_trace_size());
|
||||
for (size_t i = 0; i < frame_pointers.size(); ++i)
|
||||
{
|
||||
frame_pointers[i] = get_stack_trace_frames()[i];
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
size_t stack_trace_size = trace.getSize();
|
||||
size_t stack_trace_offset = trace.getOffset();
|
||||
frame_pointers.reserve(stack_trace_size - stack_trace_offset);
|
||||
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
|
||||
{
|
||||
frame_pointers.push_back(trace.getFramePointers()[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return frame_pointers;
|
||||
}
|
||||
|
||||
|
||||
void throwFromErrno(const std::string & s, int code, int the_errno)
|
||||
{
|
||||
|
@ -24,6 +24,8 @@ namespace DB
|
||||
class Exception : public Poco::Exception
|
||||
{
|
||||
public:
|
||||
using FramePointers = std::vector<void *>;
|
||||
|
||||
Exception() = default;
|
||||
Exception(const std::string & msg, int code, bool remote_ = false);
|
||||
Exception(const std::string & msg, const Exception & nested, int code);
|
||||
@ -66,6 +68,8 @@ public:
|
||||
bool isRemoteException() const { return remote; }
|
||||
|
||||
std::string getStackTraceString() const;
|
||||
/// Used for system.errors
|
||||
FramePointers getStackFramePointers() const;
|
||||
|
||||
private:
|
||||
#ifndef STD_EXCEPTION_HAS_STACK_TRACE
|
||||
|
@ -271,13 +271,13 @@ private:
|
||||
};
|
||||
|
||||
template <typename Key, typename Mapped>
|
||||
struct DefaultCellDisposer
|
||||
struct DefaultLRUHashMapCellDisposer
|
||||
{
|
||||
void operator()(const Key &, const Mapped &) const {}
|
||||
};
|
||||
|
||||
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||
using LRUHashMap = LRUHashMapImpl<Key, Value, Disposer, Hash, false>;
|
||||
|
||||
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
|
||||
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Disposer, Hash, true>;
|
||||
|
@ -692,6 +692,30 @@ public:
|
||||
assign(from.begin(), from.end());
|
||||
}
|
||||
|
||||
void erase(const_iterator first, const_iterator last)
|
||||
{
|
||||
iterator first_no_const = const_cast<iterator>(first);
|
||||
iterator last_no_const = const_cast<iterator>(last);
|
||||
|
||||
size_t items_to_move = end() - last;
|
||||
|
||||
while (items_to_move != 0)
|
||||
{
|
||||
*first_no_const = *last_no_const;
|
||||
|
||||
++first_no_const;
|
||||
++last_no_const;
|
||||
|
||||
--items_to_move;
|
||||
}
|
||||
|
||||
this->c_end = reinterpret_cast<char *>(first_no_const);
|
||||
}
|
||||
|
||||
void erase(const_iterator pos)
|
||||
{
|
||||
this->erase(pos, pos + 1);
|
||||
}
|
||||
|
||||
bool operator== (const PODArray & rhs) const
|
||||
{
|
||||
|
@ -92,3 +92,57 @@ TEST(Common, PODInsertElementSizeNotMultipleOfLeftPadding)
|
||||
|
||||
EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size);
|
||||
}
|
||||
|
||||
TEST(Common, PODErase)
|
||||
{
|
||||
{
|
||||
PaddedPODArray<UInt64> items {0,1,2,3,4,5,6,7,8,9};
|
||||
PaddedPODArray<UInt64> expected;
|
||||
expected = {0,1,2,3,4,5,6,7,8,9};
|
||||
|
||||
items.erase(items.begin(), items.begin());
|
||||
EXPECT_EQ(items, expected);
|
||||
|
||||
items.erase(items.end(), items.end());
|
||||
EXPECT_EQ(items, expected);
|
||||
}
|
||||
{
|
||||
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
|
||||
PaddedPODArray<UInt64> expected;
|
||||
|
||||
expected = {0,1,4,5,6,7,8,9};
|
||||
actual.erase(actual.begin() + 2, actual.begin() + 4);
|
||||
EXPECT_EQ(actual, expected);
|
||||
|
||||
expected = {0,1,4};
|
||||
actual.erase(actual.begin() + 3, actual.end());
|
||||
EXPECT_EQ(actual, expected);
|
||||
|
||||
expected = {};
|
||||
actual.erase(actual.begin(), actual.end());
|
||||
EXPECT_EQ(actual, expected);
|
||||
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
actual.emplace_back(static_cast<UInt64>(i));
|
||||
|
||||
expected = {0,1,4,5,6,7,8,9};
|
||||
actual.erase(actual.begin() + 2, actual.begin() + 4);
|
||||
EXPECT_EQ(actual, expected);
|
||||
|
||||
expected = {0,1,4};
|
||||
actual.erase(actual.begin() + 3, actual.end());
|
||||
EXPECT_EQ(actual, expected);
|
||||
|
||||
expected = {};
|
||||
actual.erase(actual.begin(), actual.end());
|
||||
EXPECT_EQ(actual, expected);
|
||||
}
|
||||
{
|
||||
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
|
||||
PaddedPODArray<UInt64> expected;
|
||||
|
||||
expected = {1,2,3,4,5,6,7,8,9};
|
||||
actual.erase(actual.begin());
|
||||
EXPECT_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ PEERDIR(
|
||||
clickhouse/base/common
|
||||
clickhouse/base/pcg-random
|
||||
clickhouse/base/widechar_width
|
||||
contrib/libs/libcpuid/libcpuid
|
||||
contrib/libs/libcpuid
|
||||
contrib/libs/openssl
|
||||
contrib/libs/poco/NetSSL_OpenSSL
|
||||
contrib/libs/re2
|
||||
|
@ -13,7 +13,7 @@ PEERDIR(
|
||||
clickhouse/base/common
|
||||
clickhouse/base/pcg-random
|
||||
clickhouse/base/widechar_width
|
||||
contrib/libs/libcpuid/libcpuid
|
||||
contrib/libs/libcpuid
|
||||
contrib/libs/openssl
|
||||
contrib/libs/poco/NetSSL_OpenSSL
|
||||
contrib/libs/re2
|
||||
|
@ -51,7 +51,7 @@ bool CachedCompressedReadBuffer::nextImpl()
|
||||
{
|
||||
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
|
||||
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
|
||||
decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
|
||||
decompressTo(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
|
||||
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ bool CompressedReadBuffer::nextImpl()
|
||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||
|
||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
||||
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -48,7 +48,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
|
||||
/// If the decompressed block fits entirely where it needs to be copied.
|
||||
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
||||
{
|
||||
decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||
bytes_read += size_decompressed;
|
||||
bytes += size_decompressed;
|
||||
}
|
||||
@ -61,9 +61,9 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
|
||||
|
||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||
pos = working_buffer.begin();
|
||||
|
||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
||||
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||
pos = working_buffer.begin();
|
||||
|
||||
bytes_read += read(to + bytes_read, n - bytes_read);
|
||||
break;
|
||||
|
@ -184,7 +184,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
|
||||
}
|
||||
|
||||
|
||||
void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
||||
static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_decompressed, CompressionCodecPtr & codec, bool allow_different_codecs)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks);
|
||||
ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed);
|
||||
@ -210,11 +210,38 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
|
||||
ErrorCodes::CANNOT_DECOMPRESS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
||||
{
|
||||
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
|
||||
codec->decompress(compressed_buffer, size_compressed_without_checksum, to);
|
||||
}
|
||||
|
||||
|
||||
void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum)
|
||||
{
|
||||
readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs);
|
||||
|
||||
if (codec->isNone())
|
||||
{
|
||||
/// Shortcut for NONE codec to avoid extra memcpy.
|
||||
/// We doing it by changing the buffer `to` to point to existing uncompressed data.
|
||||
|
||||
UInt8 header_size = ICompressionCodec::getHeaderSize();
|
||||
if (size_compressed_without_checksum < header_size)
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA,
|
||||
"Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})",
|
||||
size_compressed_without_checksum, static_cast<size_t>(header_size));
|
||||
|
||||
to = BufferBase::Buffer(compressed_buffer + header_size, compressed_buffer + size_compressed_without_checksum);
|
||||
}
|
||||
else
|
||||
codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin());
|
||||
}
|
||||
|
||||
|
||||
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
||||
CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_)
|
||||
: compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/PODArray.h>
|
||||
#include <Compression/LZ4_decompress_faster.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <IO/BufferBase.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -37,7 +38,12 @@ protected:
|
||||
/// Returns number of compressed bytes read.
|
||||
size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);
|
||||
|
||||
void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
||||
/// Decompress into memory pointed by `to`
|
||||
void decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
||||
|
||||
/// This method can change location of `to` to avoid unnecessary copy if data is uncompressed.
|
||||
/// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location.
|
||||
void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum);
|
||||
|
||||
public:
|
||||
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
||||
|
@ -31,7 +31,7 @@ bool CompressedReadBufferFromFile::nextImpl()
|
||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||
|
||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
||||
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -108,7 +108,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
|
||||
/// If the decompressed block fits entirely where it needs to be copied.
|
||||
if (size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
|
||||
{
|
||||
decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
|
||||
bytes_read += size_decompressed;
|
||||
bytes += size_decompressed;
|
||||
}
|
||||
@ -122,9 +122,9 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
|
||||
|
||||
memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
|
||||
working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
|
||||
pos = working_buffer.begin();
|
||||
|
||||
decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum);
|
||||
decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
|
||||
pos = working_buffer.begin();
|
||||
|
||||
bytes_read += read(to + bytes_read, n - bytes_read);
|
||||
break;
|
||||
|
@ -98,7 +98,7 @@ UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, ch
|
||||
|
||||
UInt8 header_size = getHeaderSize();
|
||||
if (source_size < header_size)
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}), this should include header size) is less than the header size ({})", source_size, size_t(header_size));
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ({})", source_size, static_cast<size_t>(header_size));
|
||||
|
||||
uint8_t our_method = getMethodByte();
|
||||
uint8_t method = source[0];
|
||||
|
@ -31,6 +31,8 @@ struct Settings;
|
||||
M(UInt64, rotate_log_storage_interval, 10000, "How many records will be stored in one log storage file", 0) \
|
||||
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
||||
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
||||
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, force_sync, true, " Call fsync on each change in RAFT changelog", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
@ -30,6 +30,8 @@ NuKeeperServer::NuKeeperServer(
|
||||
, state_manager(nuraft::cs_new<NuKeeperStateManager>(server_id, "test_keeper_server", config, coordination_settings))
|
||||
, responses_queue(responses_queue_)
|
||||
{
|
||||
if (coordination_settings->quorum_reads)
|
||||
LOG_WARNING(&Poco::Logger::get("NuKeeperServer"), "Quorum reads enabled, NuKeeper will work slower.");
|
||||
}
|
||||
|
||||
void NuKeeperServer::startup()
|
||||
@ -59,6 +61,7 @@ void NuKeeperServer::startup()
|
||||
params.reserved_log_items_ = coordination_settings->reserved_log_items;
|
||||
params.snapshot_distance_ = coordination_settings->snapshot_distance;
|
||||
params.stale_log_gap_ = coordination_settings->stale_log_gap;
|
||||
params.fresh_log_gap_ = coordination_settings->fresh_log_gap;
|
||||
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
params.auto_forwarding_ = coordination_settings->auto_forwarding;
|
||||
params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2;
|
||||
@ -106,7 +109,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coord
|
||||
void NuKeeperServer::putRequest(const NuKeeperStorage::RequestForSession & request_for_session)
|
||||
{
|
||||
auto [session_id, request] = request_for_session;
|
||||
if (isLeaderAlive() && request->isReadRequest())
|
||||
if (!coordination_settings->quorum_reads && isLeaderAlive() && request->isReadRequest())
|
||||
{
|
||||
state_machine->processReadRequest(request_for_session);
|
||||
}
|
||||
@ -185,6 +188,9 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
||||
if (next_index < last_commited || next_index - last_commited <= 1)
|
||||
commited_store = true;
|
||||
|
||||
if (initialized_flag)
|
||||
return nuraft::cb_func::ReturnCode::Ok;
|
||||
|
||||
auto set_initialized = [this] ()
|
||||
{
|
||||
std::unique_lock lock(initialized_mutex);
|
||||
@ -196,10 +202,27 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
||||
{
|
||||
case nuraft::cb_func::BecomeLeader:
|
||||
{
|
||||
if (commited_store) /// We become leader and store is empty, ready to serve requests
|
||||
/// We become leader and store is empty or we already committed it
|
||||
if (commited_store || initial_batch_committed)
|
||||
set_initialized();
|
||||
return nuraft::cb_func::ReturnCode::Ok;
|
||||
}
|
||||
case nuraft::cb_func::BecomeFollower:
|
||||
case nuraft::cb_func::GotAppendEntryReqFromLeader:
|
||||
{
|
||||
if (isLeaderAlive())
|
||||
{
|
||||
auto leader_index = raft_instance->get_leader_committed_log_idx();
|
||||
auto our_index = raft_instance->get_committed_log_idx();
|
||||
/// This may happen when we start RAFT cluster from scratch.
|
||||
/// Node first became leader, and after that some other node became leader.
|
||||
/// BecameFresh for this node will not be called because it was already fresh
|
||||
/// when it was leader.
|
||||
if (leader_index < our_index + coordination_settings->fresh_log_gap)
|
||||
set_initialized();
|
||||
}
|
||||
return nuraft::cb_func::ReturnCode::Ok;
|
||||
}
|
||||
case nuraft::cb_func::BecomeFresh:
|
||||
{
|
||||
set_initialized(); /// We are fresh follower, ready to serve requests.
|
||||
@ -209,6 +232,7 @@ nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type t
|
||||
{
|
||||
if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
|
||||
set_initialized();
|
||||
initial_batch_committed = true;
|
||||
return nuraft::cb_func::ReturnCode::Ok;
|
||||
}
|
||||
default: /// ignore other events
|
||||
@ -220,7 +244,7 @@ void NuKeeperServer::waitInit()
|
||||
{
|
||||
std::unique_lock lock(initialized_mutex);
|
||||
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
|
||||
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag; }))
|
||||
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
|
||||
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
|
||||
}
|
||||
|
||||
|
@ -31,8 +31,9 @@ private:
|
||||
ResponsesQueue & responses_queue;
|
||||
|
||||
std::mutex initialized_mutex;
|
||||
bool initialized_flag = false;
|
||||
std::atomic<bool> initialized_flag = false;
|
||||
std::condition_variable initialized_cv;
|
||||
std::atomic<bool> initial_batch_committed = false;
|
||||
|
||||
nuraft::cb_func::ReturnCode callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * param);
|
||||
|
||||
|
@ -241,9 +241,10 @@ NuKeeperStorageSnapshot::~NuKeeperStorageSnapshot()
|
||||
storage->disableSnapshotMode();
|
||||
}
|
||||
|
||||
NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_)
|
||||
NuKeeperSnapshotManager::NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_)
|
||||
: snapshots_path(snapshots_path_)
|
||||
, snapshots_to_keep(snapshots_to_keep_)
|
||||
, storage_tick_time(storage_tick_time_)
|
||||
{
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -325,22 +326,24 @@ nuraft::ptr<nuraft::buffer> NuKeeperSnapshotManager::serializeSnapshotToBuffer(c
|
||||
return writer.getBuffer();
|
||||
}
|
||||
|
||||
SnapshotMetadataPtr NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer)
|
||||
SnapshotMetaAndStorage NuKeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
|
||||
{
|
||||
ReadBufferFromNuraftBuffer reader(buffer);
|
||||
CompressedReadBuffer compressed_reader(reader);
|
||||
return NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
|
||||
auto storage = std::make_unique<NuKeeperStorage>(storage_tick_time);
|
||||
auto snapshot_metadata = NuKeeperStorageSnapshot::deserialize(*storage, compressed_reader);
|
||||
return std::make_pair(snapshot_metadata, std::move(storage));
|
||||
}
|
||||
|
||||
SnapshotMetadataPtr NuKeeperSnapshotManager::restoreFromLatestSnapshot(NuKeeperStorage * storage)
|
||||
SnapshotMetaAndStorage NuKeeperSnapshotManager::restoreFromLatestSnapshot()
|
||||
{
|
||||
if (existing_snapshots.empty())
|
||||
return nullptr;
|
||||
return {};
|
||||
|
||||
auto buffer = deserializeLatestSnapshotBufferFromDisk();
|
||||
if (!buffer)
|
||||
return nullptr;
|
||||
return deserializeSnapshotFromBuffer(storage, buffer);
|
||||
return {};
|
||||
return deserializeSnapshotFromBuffer(buffer);
|
||||
}
|
||||
|
||||
void NuKeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
|
||||
|
@ -40,17 +40,20 @@ public:
|
||||
using NuKeeperStorageSnapshotPtr = std::shared_ptr<NuKeeperStorageSnapshot>;
|
||||
using CreateSnapshotCallback = std::function<void(NuKeeperStorageSnapshotPtr &&)>;
|
||||
|
||||
|
||||
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, NuKeeperStoragePtr>;
|
||||
|
||||
class NuKeeperSnapshotManager
|
||||
{
|
||||
public:
|
||||
NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_);
|
||||
NuKeeperSnapshotManager(const std::string & snapshots_path_, size_t snapshots_to_keep_, size_t storage_tick_time_ = 500);
|
||||
|
||||
SnapshotMetadataPtr restoreFromLatestSnapshot(NuKeeperStorage * storage);
|
||||
SnapshotMetaAndStorage restoreFromLatestSnapshot();
|
||||
|
||||
static nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const NuKeeperStorageSnapshot & snapshot);
|
||||
std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, size_t up_to_log_idx);
|
||||
|
||||
static SnapshotMetadataPtr deserializeSnapshotFromBuffer(NuKeeperStorage * storage, nuraft::ptr<nuraft::buffer> buffer);
|
||||
SnapshotMetaAndStorage deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(size_t up_to_log_idx) const;
|
||||
nuraft::ptr<nuraft::buffer> deserializeLatestSnapshotBufferFromDisk();
|
||||
@ -74,6 +77,7 @@ private:
|
||||
const std::string snapshots_path;
|
||||
const size_t snapshots_to_keep;
|
||||
std::map<size_t, std::string> existing_snapshots;
|
||||
size_t storage_tick_time;
|
||||
};
|
||||
|
||||
struct CreateSnapshotTask
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <Coordination/NuKeeperSnapshotManager.h>
|
||||
#include <future>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -37,8 +38,7 @@ NuKeeperStorage::RequestForSession parseRequest(nuraft::buffer & data)
|
||||
|
||||
NuKeeperStateMachine::NuKeeperStateMachine(ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_)
|
||||
: coordination_settings(coordination_settings_)
|
||||
, storage(coordination_settings->dead_session_check_period_ms.totalMilliseconds())
|
||||
, snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep)
|
||||
, snapshot_manager(snapshots_path_, coordination_settings->snapshots_to_keep, coordination_settings->dead_session_check_period_ms.totalMicroseconds())
|
||||
, responses_queue(responses_queue_)
|
||||
, snapshots_queue(snapshots_queue_)
|
||||
, last_committed_idx(0)
|
||||
@ -60,7 +60,7 @@ void NuKeeperStateMachine::init()
|
||||
try
|
||||
{
|
||||
latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
|
||||
latest_snapshot_meta = snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_buf);
|
||||
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
|
||||
last_committed_idx = latest_snapshot_meta->get_last_log_idx();
|
||||
loaded = true;
|
||||
break;
|
||||
@ -83,6 +83,9 @@ void NuKeeperStateMachine::init()
|
||||
{
|
||||
LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx);
|
||||
}
|
||||
|
||||
if (!storage)
|
||||
storage = std::make_unique<NuKeeperStorage>(coordination_settings->dead_session_check_period_ms.totalMilliseconds());
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, nuraft::buffer & data)
|
||||
@ -96,7 +99,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
|
||||
nuraft::buffer_serializer bs(response);
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
session_id = storage.getSessionID(session_timeout_ms);
|
||||
session_id = storage->getSessionID(session_timeout_ms);
|
||||
bs.put_i64(session_id);
|
||||
}
|
||||
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_timeout_ms);
|
||||
@ -109,7 +112,7 @@ nuraft::ptr<nuraft::buffer> NuKeeperStateMachine::commit(const size_t log_idx, n
|
||||
NuKeeperStorage::ResponsesForSessions responses_for_sessions;
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
responses_for_sessions = storage.processRequest(request_for_session.request, request_for_session.session_id, log_idx);
|
||||
responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx);
|
||||
for (auto & response_for_session : responses_for_sessions)
|
||||
responses_queue.push(response_for_session);
|
||||
}
|
||||
@ -133,7 +136,7 @@ bool NuKeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
snapshot_manager.deserializeSnapshotFromBuffer(&storage, latest_snapshot_ptr);
|
||||
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
|
||||
}
|
||||
last_committed_idx = s.get_last_log_idx();
|
||||
return true;
|
||||
@ -157,7 +160,7 @@ void NuKeeperStateMachine::create_snapshot(
|
||||
CreateSnapshotTask snapshot_task;
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(&storage, snapshot_meta_copy);
|
||||
snapshot_task.snapshot = std::make_shared<NuKeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
|
||||
}
|
||||
|
||||
snapshot_task.create_snapshot = [this, when_done] (NuKeeperStorageSnapshotPtr && snapshot)
|
||||
@ -179,7 +182,7 @@ void NuKeeperStateMachine::create_snapshot(
|
||||
{
|
||||
/// Must do it with lock (clearing elements from list)
|
||||
std::lock_guard lock(storage_lock);
|
||||
storage.clearGarbageAfterSnapshot();
|
||||
storage->clearGarbageAfterSnapshot();
|
||||
/// Destroy snapshot with lock
|
||||
snapshot.reset();
|
||||
LOG_TRACE(log, "Cleared garbage after snapshot");
|
||||
@ -214,7 +217,7 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
||||
if (obj_id == 0)
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
NuKeeperStorageSnapshot snapshot(&storage, s.get_last_log_idx());
|
||||
NuKeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
|
||||
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
|
||||
}
|
||||
else
|
||||
@ -225,7 +228,28 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
||||
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
|
||||
cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
|
||||
|
||||
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, s.get_last_log_idx());
|
||||
/// Sometimes NuRaft can call save and create snapshots from different threads
|
||||
/// at once. To avoid race conditions we serialize snapshots through snapshots_queue
|
||||
/// TODO: make something better
|
||||
CreateSnapshotTask snapshot_task;
|
||||
std::shared_ptr<std::promise<void>> waiter = std::make_shared<std::promise<void>>();
|
||||
auto future = waiter->get_future();
|
||||
snapshot_task.snapshot = nullptr;
|
||||
snapshot_task.create_snapshot = [this, waiter, cloned_buffer, log_idx = s.get_last_log_idx()] (NuKeeperStorageSnapshotPtr &&)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(*cloned_buffer, log_idx);
|
||||
LOG_DEBUG(log, "Saved snapshot {} to path {}", log_idx, result_path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
waiter->set_value();
|
||||
};
|
||||
snapshots_queue.push(std::move(snapshot_task));
|
||||
future.wait();
|
||||
|
||||
{
|
||||
std::lock_guard lock(snapshots_lock);
|
||||
@ -233,7 +257,6 @@ void NuKeeperStateMachine::save_logical_snp_obj(
|
||||
latest_snapshot_meta = cloned_meta;
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Created snapshot {} with path {}", s.get_last_log_idx(), result_path);
|
||||
|
||||
obj_id++;
|
||||
}
|
||||
@ -271,7 +294,7 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
|
||||
NuKeeperStorage::ResponsesForSessions responses;
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
responses = storage.processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
|
||||
responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt);
|
||||
}
|
||||
for (const auto & response : responses)
|
||||
responses_queue.push(response);
|
||||
@ -280,13 +303,13 @@ void NuKeeperStateMachine::processReadRequest(const NuKeeperStorage::RequestForS
|
||||
std::unordered_set<int64_t> NuKeeperStateMachine::getDeadSessions()
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
return storage.getDeadSessions();
|
||||
return storage->getDeadSessions();
|
||||
}
|
||||
|
||||
void NuKeeperStateMachine::shutdownStorage()
|
||||
{
|
||||
std::lock_guard lock(storage_lock);
|
||||
storage.finalize();
|
||||
storage->finalize();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
|
||||
NuKeeperStorage & getStorage()
|
||||
{
|
||||
return storage;
|
||||
return *storage;
|
||||
}
|
||||
|
||||
void processReadRequest(const NuKeeperStorage::RequestForSession & request_for_session);
|
||||
@ -68,7 +68,7 @@ private:
|
||||
|
||||
CoordinationSettingsPtr coordination_settings;
|
||||
|
||||
NuKeeperStorage storage;
|
||||
NuKeeperStoragePtr storage;
|
||||
|
||||
NuKeeperSnapshotManager snapshot_manager;
|
||||
|
||||
|
@ -233,7 +233,7 @@ struct NuKeeperStorageGetRequest final : public NuKeeperStorageRequest
|
||||
struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
using NuKeeperStorageRequest::NuKeeperStorageRequest;
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t session_id) const override
|
||||
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(NuKeeperStorage::Container & container, NuKeeperStorage::Ephemerals & ephemerals, int64_t /*zxid*/, int64_t /*session_id*/) const override
|
||||
{
|
||||
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
|
||||
Coordination::ZooKeeperRemoveResponse & response = dynamic_cast<Coordination::ZooKeeperRemoveResponse &>(*response_ptr);
|
||||
@ -257,7 +257,12 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
auto prev_node = it->value;
|
||||
if (prev_node.stat.ephemeralOwner != 0)
|
||||
ephemerals[session_id].erase(request.path);
|
||||
{
|
||||
auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner);
|
||||
ephemerals_it->second.erase(request.path);
|
||||
if (ephemerals_it->second.empty())
|
||||
ephemerals.erase(ephemerals_it);
|
||||
}
|
||||
|
||||
auto child_basename = getBaseName(it->key);
|
||||
container.updateValue(parentPath(request.path), [&child_basename] (NuKeeperStorage::Node & parent)
|
||||
@ -271,10 +276,10 @@ struct NuKeeperStorageRemoveRequest final : public NuKeeperStorageRequest
|
||||
|
||||
container.erase(request.path);
|
||||
|
||||
undo = [prev_node, &container, &ephemerals, session_id, path = request.path, child_basename]
|
||||
undo = [prev_node, &container, &ephemerals, path = request.path, child_basename]
|
||||
{
|
||||
if (prev_node.stat.ephemeralOwner != 0)
|
||||
ephemerals[session_id].emplace(path);
|
||||
ephemerals[prev_node.stat.ephemeralOwner].emplace(path);
|
||||
|
||||
container.insert(path, prev_node);
|
||||
container.updateValue(parentPath(path), [&child_basename] (NuKeeperStorage::Node & parent)
|
||||
@ -377,7 +382,6 @@ struct NuKeeperStorageSetRequest final : public NuKeeperStorageRequest
|
||||
{
|
||||
return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct NuKeeperStorageListRequest final : public NuKeeperStorageRequest
|
||||
@ -641,6 +645,13 @@ NuKeeperStorage::ResponsesForSessions NuKeeperStorage::processRequest(const Coor
|
||||
for (const auto & ephemeral_path : it->second)
|
||||
{
|
||||
container.erase(ephemeral_path);
|
||||
container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (NuKeeperStorage::Node & parent)
|
||||
{
|
||||
--parent.stat.numChildren;
|
||||
++parent.stat.cversion;
|
||||
parent.children.erase(getBaseName(ephemeral_path));
|
||||
});
|
||||
|
||||
auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED);
|
||||
results.insert(results.end(), responses.begin(), responses.end());
|
||||
}
|
||||
|
@ -131,4 +131,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using NuKeeperStoragePtr = std::unique_ptr<NuKeeperStorage>;
|
||||
|
||||
}
|
||||
|
@ -132,6 +132,10 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
|
||||
|
||||
coordination_settings->loadFromConfig("test_keeper_server.coordination_settings", config);
|
||||
|
||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
||||
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
|
||||
|
||||
server = std::make_unique<NuKeeperServer>(myid, coordination_settings, config, responses_queue, snapshots_queue);
|
||||
try
|
||||
{
|
||||
@ -148,10 +152,8 @@ void NuKeeperStorageDispatcher::initialize(const Poco::Util::AbstractConfigurati
|
||||
throw;
|
||||
}
|
||||
|
||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
||||
|
||||
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
|
||||
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
|
||||
|
||||
LOG_DEBUG(log, "Dispatcher initialized");
|
||||
}
|
||||
|
@ -897,25 +897,25 @@ TEST(CoordinationTest, TestStorageSnapshotSimple)
|
||||
manager.serializeSnapshotBufferToDisk(*buf, 2);
|
||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin"));
|
||||
|
||||
DB::NuKeeperStorage restored_storage(500);
|
||||
|
||||
auto debuf = manager.deserializeSnapshotBufferFromDisk(2);
|
||||
manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
|
||||
|
||||
EXPECT_EQ(restored_storage.container.size(), 3);
|
||||
EXPECT_EQ(restored_storage.container.getValue("/").children.size(), 1);
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello").children.size(), 1);
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").children.size(), 0);
|
||||
auto [snapshot_meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
|
||||
|
||||
EXPECT_EQ(restored_storage.container.getValue("/").data, "");
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello").data, "world");
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello/somepath").data, "somedata");
|
||||
EXPECT_EQ(restored_storage.session_id_counter, 7);
|
||||
EXPECT_EQ(restored_storage.zxid, 2);
|
||||
EXPECT_EQ(restored_storage.ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage.ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage.ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage.session_and_timeout.size(), 2);
|
||||
EXPECT_EQ(restored_storage->container.size(), 3);
|
||||
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello").children.size(), 1);
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").children.size(), 0);
|
||||
|
||||
EXPECT_EQ(restored_storage->container.getValue("/").data, "");
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello").data, "world");
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").data, "somedata");
|
||||
EXPECT_EQ(restored_storage->session_id_counter, 7);
|
||||
EXPECT_EQ(restored_storage->zxid, 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
|
||||
}
|
||||
|
||||
TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
|
||||
@ -946,15 +946,14 @@ TEST(CoordinationTest, TestStorageSnapshotMoreWrites)
|
||||
manager.serializeSnapshotBufferToDisk(*buf, 50);
|
||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin"));
|
||||
|
||||
DB::NuKeeperStorage restored_storage(500);
|
||||
|
||||
auto debuf = manager.deserializeSnapshotBufferFromDisk(50);
|
||||
manager.deserializeSnapshotFromBuffer(&restored_storage, debuf);
|
||||
auto [meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
|
||||
|
||||
EXPECT_EQ(restored_storage.container.size(), 51);
|
||||
EXPECT_EQ(restored_storage->container.size(), 51);
|
||||
for (size_t i = 0; i < 50; ++i)
|
||||
{
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -987,14 +986,13 @@ TEST(CoordinationTest, TestStorageSnapshotManySnapshots)
|
||||
EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin"));
|
||||
|
||||
|
||||
DB::NuKeeperStorage restored_storage(500);
|
||||
manager.restoreFromLatestSnapshot(&restored_storage);
|
||||
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
|
||||
|
||||
EXPECT_EQ(restored_storage.container.size(), 251);
|
||||
EXPECT_EQ(restored_storage->container.size(), 251);
|
||||
|
||||
for (size_t i = 0; i < 250; ++i)
|
||||
{
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1040,12 +1038,11 @@ TEST(CoordinationTest, TestStorageSnapshotMode)
|
||||
EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
|
||||
}
|
||||
|
||||
DB::NuKeeperStorage restored_storage(500);
|
||||
manager.restoreFromLatestSnapshot(&restored_storage);
|
||||
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
|
||||
|
||||
for (size_t i = 0; i < 50; ++i)
|
||||
{
|
||||
EXPECT_EQ(restored_storage.container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
|
||||
}
|
||||
|
||||
}
|
||||
@ -1071,8 +1068,7 @@ TEST(CoordinationTest, TestStorageSnapshotBroken)
|
||||
plain_buf.truncate(34);
|
||||
plain_buf.sync();
|
||||
|
||||
DB::NuKeeperStorage restored_storage(500);
|
||||
EXPECT_THROW(manager.restoreFromLatestSnapshot(&restored_storage), DB::Exception);
|
||||
EXPECT_THROW(manager.restoreFromLatestSnapshot(), DB::Exception);
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> getBufferFromZKRequest(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
|
||||
@ -1236,6 +1232,37 @@ TEST(CoordinationTest, TestStateMachineAndLogStore)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CoordinationTest, TestEphemeralNodeRemove)
|
||||
{
|
||||
using namespace Coordination;
|
||||
using namespace DB;
|
||||
|
||||
ChangelogDirTest snapshots("./snapshots");
|
||||
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
|
||||
|
||||
ResponsesQueue queue;
|
||||
SnapshotsQueue snapshots_queue{1};
|
||||
auto state_machine = std::make_shared<NuKeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings);
|
||||
state_machine->init();
|
||||
|
||||
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
|
||||
request_c->path = "/hello";
|
||||
request_c->is_ephemeral = true;
|
||||
auto entry_c = getLogEntryFromZKRequest(0, 1, request_c);
|
||||
state_machine->commit(1, entry_c->get_buf());
|
||||
const auto & storage = state_machine->getStorage();
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 1);
|
||||
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
|
||||
request_d->path = "/hello";
|
||||
/// Delete from other session
|
||||
auto entry_d = getLogEntryFromZKRequest(0, 2, request_d);
|
||||
state_machine->commit(2, entry_d->get_buf());
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 0);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
||||
|
@ -953,3 +953,26 @@ void writeFieldText(const Field & x, WriteBuffer & buf);
|
||||
String toString(const Field & x);
|
||||
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<DB::Field>
|
||||
{
|
||||
constexpr auto parse(format_parse_context & ctx)
|
||||
{
|
||||
auto it = ctx.begin();
|
||||
auto end = ctx.end();
|
||||
|
||||
/// Only support {}.
|
||||
if (it != end && *it != '}')
|
||||
throw format_error("invalid format");
|
||||
|
||||
return it;
|
||||
}
|
||||
|
||||
template <typename FormatContext>
|
||||
auto format(const DB::Field & x, FormatContext & ctx)
|
||||
{
|
||||
return format_to(ctx.out(), "{}", toString(x));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -101,7 +101,7 @@ template <DictionaryKeyType dictionary_key_type>
|
||||
double CacheDictionary<dictionary_key_type>::getLoadFactor() const
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
return static_cast<double>(cache_storage_ptr->getSize()) / cache_storage_ptr->getMaxSize();
|
||||
return cache_storage_ptr->getLoadFactor();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
@ -333,9 +333,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumnsImpl(
|
||||
FetchResult result_of_fetch_from_storage;
|
||||
|
||||
{
|
||||
/// Write lock on storage
|
||||
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
|
||||
result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <variant>
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
@ -30,28 +31,31 @@ struct CacheDictionaryStorageConfiguration
|
||||
const DictionaryLifetime lifetime;
|
||||
};
|
||||
|
||||
/** Keys are stored in LRUCache and column values are serialized into arena.
|
||||
|
||||
Cell in LRUCache consists of allocated size and place in arena were columns serialized data is stored.
|
||||
|
||||
Columns are serialized by rows.
|
||||
|
||||
When cell is removed from LRUCache data associated with it is also removed from arena.
|
||||
|
||||
In case of complex key we also store key data in arena and it is removed from arena.
|
||||
*/
|
||||
/** ICacheDictionaryStorage implementation that keeps key in hash table with fixed collision length.
|
||||
* Value in hash table point to index in attributes arrays.
|
||||
*/
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
class CacheDictionaryStorage final : public ICacheDictionaryStorage
|
||||
{
|
||||
|
||||
static constexpr size_t max_collision_length = 10;
|
||||
|
||||
public:
|
||||
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
|
||||
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
|
||||
|
||||
explicit CacheDictionaryStorage(CacheDictionaryStorageConfiguration & configuration_)
|
||||
explicit CacheDictionaryStorage(
|
||||
const DictionaryStructure & dictionary_structure,
|
||||
CacheDictionaryStorageConfiguration & configuration_)
|
||||
: configuration(configuration_)
|
||||
, rnd_engine(randomSeed())
|
||||
, cache(configuration.max_size_in_cells, false, { arena })
|
||||
{
|
||||
size_t cells_size = roundUpToPowerOfTwoOrZero(std::max(configuration.max_size_in_cells, max_collision_length));
|
||||
|
||||
cells.resize_fill(cells_size);
|
||||
size_overlap_mask = cells_size - 1;
|
||||
|
||||
setup(dictionary_structure);
|
||||
}
|
||||
|
||||
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
|
||||
@ -71,9 +75,7 @@ public:
|
||||
const DictionaryStorageFetchRequest & fetch_request) override
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
|
||||
{
|
||||
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
|
||||
}
|
||||
else
|
||||
throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
@ -109,9 +111,7 @@ public:
|
||||
const DictionaryStorageFetchRequest & column_fetch_requests) override
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
||||
{
|
||||
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
|
||||
}
|
||||
else
|
||||
throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
@ -140,79 +140,162 @@ public:
|
||||
throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
size_t getSize() const override { return cache.size(); }
|
||||
size_t getSize() const override { return size; }
|
||||
|
||||
size_t getMaxSize() const override { return cache.getMaxSize(); }
|
||||
double getLoadFactor() const override { return static_cast<double>(size) / configuration.max_size_in_cells; }
|
||||
|
||||
size_t getBytesAllocated() const override { return arena.size() + cache.getSizeInBytes(); }
|
||||
size_t getBytesAllocated() const override
|
||||
{
|
||||
size_t attributes_size_in_bytes = 0;
|
||||
size_t attributes_size = attributes.size();
|
||||
|
||||
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
||||
{
|
||||
getAttributeContainer(attribute_index, [&](const auto & container)
|
||||
{
|
||||
attributes_size_in_bytes += container.capacity() * sizeof(container[0]);
|
||||
});
|
||||
}
|
||||
|
||||
return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct FetchedKey
|
||||
{
|
||||
FetchedKey(size_t element_index_, bool is_default_)
|
||||
: element_index(element_index_)
|
||||
, is_default(is_default_)
|
||||
{}
|
||||
|
||||
size_t element_index;
|
||||
bool is_default;
|
||||
};
|
||||
|
||||
template <typename KeysStorageFetchResult>
|
||||
ALWAYS_INLINE KeysStorageFetchResult fetchColumnsForKeysImpl(
|
||||
KeysStorageFetchResult fetchColumnsForKeysImpl(
|
||||
const PaddedPODArray<KeyType> & keys,
|
||||
const DictionaryStorageFetchRequest & fetch_request)
|
||||
{
|
||||
KeysStorageFetchResult result;
|
||||
|
||||
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
||||
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
|
||||
result.key_index_to_state.resize_fill(keys.size());
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||
|
||||
size_t fetched_columns_index = 0;
|
||||
size_t keys_size = keys.size();
|
||||
|
||||
std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
|
||||
|
||||
size_t keys_size = keys.size();
|
||||
PaddedPODArray<FetchedKey> fetched_keys;
|
||||
fetched_keys.resize_fill(keys_size);
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys[key_index];
|
||||
auto * it = cache.find(key);
|
||||
auto [key_state, cell_index] = getKeyStateAndCellIndex(key, now);
|
||||
|
||||
if (it)
|
||||
if (unlikely(key_state == KeyState::not_found))
|
||||
{
|
||||
/// Columns values for key are serialized in cache now deserialize them
|
||||
const auto & cell = it->getMapped();
|
||||
result.key_index_to_state[key_index] = {KeyState::not_found};
|
||||
++result.not_found_keys_size;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool has_deadline = cellHasDeadline(cell);
|
||||
auto & cell = cells[cell_index];
|
||||
|
||||
if (has_deadline && now > cell.deadline + max_lifetime_seconds)
|
||||
{
|
||||
result.key_index_to_state[key_index] = {KeyState::not_found};
|
||||
++result.not_found_keys_size;
|
||||
continue;
|
||||
}
|
||||
else if (has_deadline && now > cell.deadline)
|
||||
{
|
||||
result.key_index_to_state[key_index] = {KeyState::expired, fetched_columns_index};
|
||||
++result.expired_keys_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.key_index_to_state[key_index] = {KeyState::found, fetched_columns_index};
|
||||
++result.found_keys_size;
|
||||
}
|
||||
result.expired_keys_size += static_cast<size_t>(key_state == KeyState::expired);
|
||||
|
||||
++fetched_columns_index;
|
||||
result.key_index_to_state[key_index] = {key_state, fetched_columns_index};
|
||||
fetched_keys[fetched_columns_index] = FetchedKey(cell.element_index, cell.is_default);
|
||||
|
||||
if (cell.isDefault())
|
||||
++fetched_columns_index;
|
||||
|
||||
result.key_index_to_state[key_index].setDefaultValue(cell.is_default);
|
||||
result.default_keys_size += cell.is_default;
|
||||
}
|
||||
|
||||
result.found_keys_size = keys_size - (result.expired_keys_size + result.not_found_keys_size);
|
||||
|
||||
for (size_t attribute_index = 0; attribute_index < fetch_request.attributesSize(); ++attribute_index)
|
||||
{
|
||||
if (!fetch_request.shouldFillResultColumnWithIndex(attribute_index))
|
||||
continue;
|
||||
|
||||
auto & attribute = attributes[attribute_index];
|
||||
const auto & default_value_provider = fetch_request.defaultValueProviderAtIndex(attribute_index);
|
||||
|
||||
size_t fetched_keys_size = fetched_keys.size();
|
||||
auto & fetched_column = *result.fetched_columns[attribute_index];
|
||||
fetched_column.reserve(fetched_keys_size);
|
||||
|
||||
if (unlikely(attribute.is_complex_type))
|
||||
{
|
||||
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
|
||||
|
||||
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||
{
|
||||
result.key_index_to_state[key_index].setDefault();
|
||||
++result.default_keys_size;
|
||||
insertDefaultValuesIntoColumns(result.fetched_columns, fetch_request, key_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * place_for_serialized_columns = cell.place_for_serialized_columns;
|
||||
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, place_for_serialized_columns);
|
||||
auto fetched_key = fetched_keys[fetched_key_index];
|
||||
|
||||
if (unlikely(fetched_key.is_default))
|
||||
fetched_column.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||
else
|
||||
fetched_column.insert(container[fetched_key.element_index]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result.key_index_to_state[key_index] = {KeyState::not_found};
|
||||
++result.not_found_keys_size;
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnType =
|
||||
std::conditional_t<std::is_same_v<AttributeType, String>, ColumnString,
|
||||
std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>,
|
||||
ColumnVector<AttributeType>>>;
|
||||
|
||||
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
|
||||
ColumnType & column_typed = static_cast<ColumnType &>(fetched_column);
|
||||
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>)
|
||||
{
|
||||
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||
{
|
||||
auto fetched_key = fetched_keys[fetched_key_index];
|
||||
|
||||
if (unlikely(fetched_key.is_default))
|
||||
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||
else
|
||||
{
|
||||
auto item = container[fetched_key.element_index];
|
||||
column_typed.insertData(item.data, item.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & data = column_typed.getData();
|
||||
|
||||
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
|
||||
{
|
||||
auto fetched_key = fetched_keys[fetched_key_index];
|
||||
|
||||
if (unlikely(fetched_key.is_default))
|
||||
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
|
||||
else
|
||||
{
|
||||
auto item = container[fetched_key.element_index];
|
||||
data.push_back(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
@ -221,58 +304,108 @@ private:
|
||||
|
||||
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
|
||||
{
|
||||
Arena temporary_values_pool;
|
||||
|
||||
size_t columns_to_serialize_size = columns.size();
|
||||
PaddedPODArray<StringRef> temporary_column_data(columns_to_serialize_size);
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
size_t keys_size = keys.size();
|
||||
Field column_value;
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
for (size_t key_index = 0; key_index < keys.size(); ++key_index)
|
||||
{
|
||||
size_t allocated_size_for_columns = 0;
|
||||
const char * block_start = nullptr;
|
||||
|
||||
auto key = keys[key_index];
|
||||
auto * it = cache.find(key);
|
||||
|
||||
for (size_t column_index = 0; column_index < columns_to_serialize_size; ++column_index)
|
||||
size_t cell_index = getCellIndexForInsert(key);
|
||||
auto & cell = cells[cell_index];
|
||||
|
||||
bool cell_was_default = cell.is_default;
|
||||
cell.is_default = false;
|
||||
|
||||
bool was_inserted = cell.deadline == 0;
|
||||
|
||||
if (was_inserted)
|
||||
{
|
||||
auto & column = columns[column_index];
|
||||
temporary_column_data[column_index] = column->serializeValueIntoArena(key_index, temporary_values_pool, block_start);
|
||||
allocated_size_for_columns += temporary_column_data[column_index].size;
|
||||
}
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
cell.key = copyStringInArena(key);
|
||||
else
|
||||
cell.key = key;
|
||||
|
||||
char * place_for_serialized_columns = arena.alloc(allocated_size_for_columns);
|
||||
memcpy(reinterpret_cast<void*>(place_for_serialized_columns), reinterpret_cast<const void*>(block_start), allocated_size_for_columns);
|
||||
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
||||
{
|
||||
auto & column = columns[attribute_index];
|
||||
|
||||
if (it)
|
||||
{
|
||||
/// Cell exists need to free previous serialized place and update deadline
|
||||
auto & cell = it->getMapped();
|
||||
getAttributeContainer(attribute_index, [&](auto & container)
|
||||
{
|
||||
container.emplace_back();
|
||||
cell.element_index = container.size() - 1;
|
||||
|
||||
if (cell.place_for_serialized_columns)
|
||||
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
|
||||
using ElementType = std::decay_t<decltype(container[0])>;
|
||||
|
||||
setCellDeadline(cell, now);
|
||||
cell.allocated_size_for_columns = allocated_size_for_columns;
|
||||
cell.place_for_serialized_columns = place_for_serialized_columns;
|
||||
column->get(key_index, column_value);
|
||||
|
||||
if constexpr (std::is_same_v<ElementType, Field>)
|
||||
container.back() = column_value;
|
||||
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||
{
|
||||
const String & string_value = column_value.get<String>();
|
||||
StringRef string_value_ref = StringRef {string_value.data(), string_value.size()};
|
||||
StringRef inserted_value = copyStringInArena(string_value_ref);
|
||||
container.back() = inserted_value;
|
||||
}
|
||||
else
|
||||
container.back() = column_value.get<NearestFieldType<ElementType>>();
|
||||
});
|
||||
}
|
||||
|
||||
++size;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// No cell exists so create and put in cache
|
||||
Cell cell;
|
||||
if (cell.key != key)
|
||||
{
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
{
|
||||
char * data = const_cast<char *>(cell.key.data);
|
||||
arena.free(data, cell.key.size);
|
||||
cell.key = copyStringInArena(key);
|
||||
}
|
||||
else
|
||||
cell.key = key;
|
||||
}
|
||||
|
||||
setCellDeadline(cell, now);
|
||||
cell.allocated_size_for_columns = allocated_size_for_columns;
|
||||
cell.place_for_serialized_columns = place_for_serialized_columns;
|
||||
/// Put values into existing index
|
||||
size_t index_to_use = cell.element_index;
|
||||
|
||||
insertCellInCache(key, cell);
|
||||
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
||||
{
|
||||
auto & column = columns[attribute_index];
|
||||
|
||||
getAttributeContainer(attribute_index, [&](auto & container)
|
||||
{
|
||||
using ElementType = std::decay_t<decltype(container[0])>;
|
||||
|
||||
column->get(key_index, column_value);
|
||||
|
||||
if constexpr (std::is_same_v<ElementType, Field>)
|
||||
container[index_to_use] = column_value;
|
||||
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||
{
|
||||
const String & string_value = column_value.get<String>();
|
||||
StringRef string_ref_value = StringRef {string_value.data(), string_value.size()};
|
||||
StringRef inserted_value = copyStringInArena(string_ref_value);
|
||||
|
||||
if (!cell_was_default)
|
||||
{
|
||||
StringRef previous_value = container[index_to_use];
|
||||
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
||||
}
|
||||
|
||||
container[index_to_use] = inserted_value;
|
||||
}
|
||||
else
|
||||
container[index_to_use] = column_value.get<NearestFieldType<ElementType>>();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
temporary_values_pool.rollback(allocated_size_for_columns);
|
||||
setCellDeadline(cell, now);
|
||||
}
|
||||
}
|
||||
|
||||
@ -280,94 +413,224 @@ private:
|
||||
{
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
for (auto key : keys)
|
||||
size_t keys_size = keys.size();
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto * it = cache.find(key);
|
||||
auto key = keys[key_index];
|
||||
|
||||
if (it)
|
||||
size_t cell_index = getCellIndexForInsert(key);
|
||||
auto & cell = cells[cell_index];
|
||||
|
||||
bool was_inserted = cell.deadline == 0;
|
||||
bool cell_was_default = cell.is_default;
|
||||
|
||||
cell.is_default = true;
|
||||
|
||||
if (was_inserted)
|
||||
{
|
||||
auto & cell = it->getMapped();
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
cell.key = copyStringInArena(key);
|
||||
else
|
||||
cell.key = key;
|
||||
|
||||
setCellDeadline(cell, now);
|
||||
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||
{
|
||||
getAttributeContainer(attribute_index, [&](auto & container)
|
||||
{
|
||||
container.emplace_back();
|
||||
cell.element_index = container.size() - 1;
|
||||
});
|
||||
}
|
||||
|
||||
if (cell.place_for_serialized_columns)
|
||||
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
|
||||
|
||||
cell.allocated_size_for_columns = 0;
|
||||
cell.place_for_serialized_columns = nullptr;
|
||||
++size;
|
||||
}
|
||||
else
|
||||
{
|
||||
Cell cell;
|
||||
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||
{
|
||||
getAttributeContainer(attribute_index, [&](const auto & container)
|
||||
{
|
||||
using ElementType = std::decay_t<decltype(container[0])>;
|
||||
|
||||
setCellDeadline(cell, now);
|
||||
cell.allocated_size_for_columns = 0;
|
||||
cell.place_for_serialized_columns = nullptr;
|
||||
if constexpr (std::is_same_v<ElementType, StringRef>)
|
||||
{
|
||||
if (!cell_was_default)
|
||||
{
|
||||
StringRef previous_value = container[cell.element_index];
|
||||
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
insertCellInCache(key, cell);
|
||||
if (cell.key != key)
|
||||
{
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
{
|
||||
char * data = const_cast<char *>(cell.key.data);
|
||||
arena.free(data, cell.key.size);
|
||||
cell.key = copyStringInArena(key);
|
||||
}
|
||||
else
|
||||
cell.key = key;
|
||||
}
|
||||
}
|
||||
|
||||
setCellDeadline(cell, now);
|
||||
}
|
||||
}
|
||||
|
||||
PaddedPODArray<KeyType> getCachedKeysImpl() const
|
||||
{
|
||||
PaddedPODArray<KeyType> result;
|
||||
result.reserve(cache.size());
|
||||
result.reserve(size);
|
||||
|
||||
for (auto & node : cache)
|
||||
for (auto & cell : cells)
|
||||
{
|
||||
auto & cell = node.getMapped();
|
||||
|
||||
if (cell.isDefault())
|
||||
if (cell.deadline == 0)
|
||||
continue;
|
||||
|
||||
result.emplace_back(node.getKey());
|
||||
if (cell.is_default)
|
||||
continue;
|
||||
|
||||
result.emplace_back(cell.key);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename GetContainerFunc>
|
||||
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func)
|
||||
{
|
||||
auto & attribute = attributes[attribute_index];
|
||||
auto & attribute_type = attribute.type;
|
||||
|
||||
if (unlikely(attribute.is_complex_type))
|
||||
{
|
||||
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
|
||||
std::forward<GetContainerFunc>(func)(container);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
|
||||
std::forward<GetContainerFunc>(func)(container);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute_type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GetContainerFunc>
|
||||
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func) const
|
||||
{
|
||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->template getAttributeContainer(attribute_index, std::forward<GetContainerFunc>(func));
|
||||
}
|
||||
|
||||
StringRef copyStringInArena(StringRef value_to_copy)
|
||||
{
|
||||
size_t value_to_copy_size = value_to_copy.size;
|
||||
char * place_for_key = arena.alloc(value_to_copy_size);
|
||||
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data), value_to_copy_size);
|
||||
StringRef updated_value{place_for_key, value_to_copy_size};
|
||||
|
||||
return updated_value;
|
||||
}
|
||||
|
||||
void setup(const DictionaryStructure & dictionary_structure)
|
||||
{
|
||||
/// For each dictionary attribute create storage attribute
|
||||
/// For simple attributes create PODArray, for complex vector of Fields
|
||||
|
||||
attributes.reserve(dictionary_structure.attributes.size());
|
||||
|
||||
for (const auto & dictionary_attribute : dictionary_structure.attributes)
|
||||
{
|
||||
auto attribute_type = dictionary_attribute.underlying_type;
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
attributes.emplace_back();
|
||||
auto & last_attribute = attributes.back();
|
||||
last_attribute.type = attribute_type;
|
||||
last_attribute.is_complex_type = dictionary_attribute.is_nullable || dictionary_attribute.is_array;
|
||||
|
||||
if (dictionary_attribute.is_nullable)
|
||||
last_attribute.attribute_container = std::vector<Field>();
|
||||
else
|
||||
last_attribute.attribute_container = PaddedPODArray<ValueType>();
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute_type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
using TimePoint = std::chrono::system_clock::time_point;
|
||||
|
||||
struct Cell
|
||||
{
|
||||
TimePoint deadline;
|
||||
size_t allocated_size_for_columns;
|
||||
char * place_for_serialized_columns;
|
||||
|
||||
inline bool isDefault() const { return place_for_serialized_columns == nullptr; }
|
||||
inline void setDefault()
|
||||
{
|
||||
place_for_serialized_columns = nullptr;
|
||||
allocated_size_for_columns = 0;
|
||||
}
|
||||
KeyType key;
|
||||
size_t element_index;
|
||||
bool is_default;
|
||||
time_t deadline;
|
||||
};
|
||||
|
||||
void insertCellInCache(KeyType & key, const Cell & cell)
|
||||
struct Attribute
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
|
||||
{
|
||||
/// Copy complex key into arena and put in cache
|
||||
size_t key_size = key.size;
|
||||
char * place_for_key = arena.alloc(key_size);
|
||||
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
|
||||
KeyType updated_key{place_for_key, key_size};
|
||||
key = updated_key;
|
||||
}
|
||||
AttributeUnderlyingType type;
|
||||
bool is_complex_type;
|
||||
|
||||
cache.insert(key, cell);
|
||||
}
|
||||
std::variant<
|
||||
PaddedPODArray<UInt8>,
|
||||
PaddedPODArray<UInt16>,
|
||||
PaddedPODArray<UInt32>,
|
||||
PaddedPODArray<UInt64>,
|
||||
PaddedPODArray<UInt128>,
|
||||
PaddedPODArray<Int8>,
|
||||
PaddedPODArray<Int16>,
|
||||
PaddedPODArray<Int32>,
|
||||
PaddedPODArray<Int64>,
|
||||
PaddedPODArray<Decimal32>,
|
||||
PaddedPODArray<Decimal64>,
|
||||
PaddedPODArray<Decimal128>,
|
||||
PaddedPODArray<Float32>,
|
||||
PaddedPODArray<Float64>,
|
||||
PaddedPODArray<StringRef>,
|
||||
std::vector<Field>> attribute_container;
|
||||
};
|
||||
|
||||
inline static bool cellHasDeadline(const Cell & cell)
|
||||
{
|
||||
return cell.deadline != std::chrono::system_clock::from_time_t(0);
|
||||
}
|
||||
CacheDictionaryStorageConfiguration configuration;
|
||||
|
||||
pcg64 rnd_engine;
|
||||
|
||||
size_t size_overlap_mask = 0;
|
||||
|
||||
size_t size = 0;
|
||||
|
||||
PaddedPODArray<Cell> cells;
|
||||
|
||||
ArenaWithFreeLists arena;
|
||||
|
||||
std::vector<Attribute> attributes;
|
||||
|
||||
inline void setCellDeadline(Cell & cell, TimePoint now)
|
||||
{
|
||||
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
||||
{
|
||||
cell.deadline = std::chrono::system_clock::from_time_t(0);
|
||||
/// This maybe not obvious, but when we define is this cell is expired or expired permanently, we add strict_max_lifetime_seconds
|
||||
/// to the expiration time. And it overflows pretty well.
|
||||
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
|
||||
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -375,44 +638,75 @@ private:
|
||||
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
||||
cell.deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
||||
|
||||
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
||||
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||
}
|
||||
|
||||
template <typename>
|
||||
friend class ArenaCellDisposer;
|
||||
|
||||
CacheDictionaryStorageConfiguration configuration;
|
||||
|
||||
ArenaWithFreeLists arena;
|
||||
|
||||
pcg64 rnd_engine;
|
||||
|
||||
class ArenaCellDisposer
|
||||
inline size_t getCellIndex(const KeyType key) const
|
||||
{
|
||||
public:
|
||||
ArenaWithFreeLists & arena;
|
||||
const size_t hash = DefaultHash<KeyType>()(key);
|
||||
const size_t index = hash & size_overlap_mask;
|
||||
return index;
|
||||
}
|
||||
|
||||
template <typename Key, typename Value>
|
||||
void operator()(const Key & key, const Value & value) const
|
||||
using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
|
||||
|
||||
inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
|
||||
{
|
||||
size_t place_value = getCellIndex(key);
|
||||
const size_t place_value_end = place_value + max_collision_length;
|
||||
|
||||
time_t max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
|
||||
|
||||
for (; place_value < place_value_end; ++place_value)
|
||||
{
|
||||
/// In case of complex key we keep it in arena
|
||||
if constexpr (std::is_same_v<Key, StringRef>)
|
||||
arena.free(const_cast<char *>(key.data), key.size);
|
||||
const auto cell_place_value = place_value & size_overlap_mask;
|
||||
const auto & cell = cells[cell_place_value];
|
||||
|
||||
if (value.place_for_serialized_columns)
|
||||
arena.free(value.place_for_serialized_columns, value.allocated_size_for_columns);
|
||||
if (cell.key != key)
|
||||
continue;
|
||||
|
||||
if (unlikely(now > cell.deadline + max_lifetime_seconds))
|
||||
return std::make_pair(KeyState::not_found, cell_place_value);
|
||||
|
||||
if (unlikely(now > cell.deadline))
|
||||
return std::make_pair(KeyState::expired, cell_place_value);
|
||||
|
||||
return std::make_pair(KeyState::found, cell_place_value);
|
||||
}
|
||||
};
|
||||
|
||||
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellDisposer>;
|
||||
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellDisposer>;
|
||||
return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
|
||||
}
|
||||
|
||||
using CacheLRUHashMap = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::simple,
|
||||
SimpleKeyLRUHashMap,
|
||||
ComplexKeyLRUHashMap>;
|
||||
inline size_t getCellIndexForInsert(const KeyType & key) const
|
||||
{
|
||||
size_t place_value = getCellIndex(key);
|
||||
const size_t place_value_end = place_value + max_collision_length;
|
||||
size_t oldest_place_value = place_value;
|
||||
|
||||
CacheLRUHashMap cache;
|
||||
time_t oldest_time = std::numeric_limits<time_t>::max();
|
||||
|
||||
for (; place_value < place_value_end; ++place_value)
|
||||
{
|
||||
const size_t cell_place_value = place_value & size_overlap_mask;
|
||||
const Cell cell = cells[cell_place_value];
|
||||
|
||||
if (cell.deadline == 0)
|
||||
return cell_place_value;
|
||||
|
||||
if (cell.key == key)
|
||||
return cell_place_value;
|
||||
|
||||
if (cell.deadline < oldest_time)
|
||||
{
|
||||
oldest_time = cell.deadline;
|
||||
oldest_place_value = cell_place_value;
|
||||
}
|
||||
}
|
||||
|
||||
return oldest_place_value;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -12,9 +12,9 @@ struct KeyState
|
||||
{
|
||||
enum State: uint8_t
|
||||
{
|
||||
not_found = 2,
|
||||
expired = 4,
|
||||
found = 8,
|
||||
not_found = 0,
|
||||
expired = 1,
|
||||
found = 2,
|
||||
};
|
||||
|
||||
KeyState(State state_, size_t fetched_column_index_)
|
||||
@ -31,9 +31,10 @@ struct KeyState
|
||||
inline bool isNotFound() const { return state == State::not_found; }
|
||||
inline bool isDefault() const { return is_default; }
|
||||
inline void setDefault() { is_default = true; }
|
||||
inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
|
||||
/// Valid only if keyState is found or expired
|
||||
inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
|
||||
|
||||
inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
|
||||
private:
|
||||
State state = not_found;
|
||||
size_t fetched_column_index = 0;
|
||||
@ -111,8 +112,8 @@ public:
|
||||
/// Return size of keys in storage
|
||||
virtual size_t getSize() const = 0;
|
||||
|
||||
/// Return maximum size of keys in storage
|
||||
virtual size_t getMaxSize() const = 0;
|
||||
/// Returns storage load factor
|
||||
virtual double getLoadFactor() const = 0;
|
||||
|
||||
/// Return bytes allocated in storage
|
||||
virtual size_t getBytesAllocated() const = 0;
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/ArenaWithFreeLists.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/HashTable/LRUHashMap.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <IO/AIO.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/ICacheDictionaryStorage.h>
|
||||
@ -56,7 +56,6 @@ struct SSDCacheDictionaryStorageConfiguration
|
||||
|
||||
const std::string file_path;
|
||||
const size_t max_partitions_count;
|
||||
const size_t max_stored_keys;
|
||||
const size_t block_size;
|
||||
const size_t file_blocks_size;
|
||||
const size_t read_buffer_blocks_size;
|
||||
@ -127,7 +126,7 @@ public:
|
||||
|
||||
/// Reset block with new block_data
|
||||
/// block_data must be filled with zeroes if it is new block
|
||||
ALWAYS_INLINE inline void reset(char * new_block_data)
|
||||
inline void reset(char * new_block_data)
|
||||
{
|
||||
block_data = new_block_data;
|
||||
current_block_offset = block_header_size;
|
||||
@ -135,13 +134,13 @@ public:
|
||||
}
|
||||
|
||||
/// Check if it is enough place to write key in block
|
||||
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
|
||||
inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
|
||||
{
|
||||
return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
|
||||
}
|
||||
|
||||
/// Check if it is enough place to write key in block
|
||||
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
|
||||
inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
|
||||
{
|
||||
const StringRef & key = cache_key.key;
|
||||
size_t complex_key_size = sizeof(key.size) + key.size;
|
||||
@ -152,7 +151,7 @@ public:
|
||||
/// Write key and returns offset in ssd cache block where data is written
|
||||
/// It is client responsibility to check if there is enough place in block to write key
|
||||
/// Returns true if key was written and false if there was not enough place to write key
|
||||
ALWAYS_INLINE inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
|
||||
inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
|
||||
{
|
||||
assert(cache_key.size > 0);
|
||||
|
||||
@ -181,7 +180,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
|
||||
inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
|
||||
{
|
||||
assert(cache_key.size > 0);
|
||||
|
||||
@ -216,20 +215,20 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline size_t getKeysSize() const { return keys_size; }
|
||||
inline size_t getKeysSize() const { return keys_size; }
|
||||
|
||||
/// Write keys size into block header
|
||||
ALWAYS_INLINE inline void writeKeysSize()
|
||||
inline void writeKeysSize()
|
||||
{
|
||||
char * keys_size_offset_data = block_data + block_header_check_sum_size;
|
||||
std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
|
||||
}
|
||||
|
||||
/// Get check sum from block header
|
||||
ALWAYS_INLINE inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
|
||||
inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
|
||||
|
||||
/// Calculate check sum in block
|
||||
ALWAYS_INLINE inline size_t calculateCheckSum() const
|
||||
inline size_t calculateCheckSum() const
|
||||
{
|
||||
size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
||||
|
||||
@ -237,7 +236,7 @@ public:
|
||||
}
|
||||
|
||||
/// Check if check sum from block header matched calculated check sum in block
|
||||
ALWAYS_INLINE inline bool checkCheckSum() const
|
||||
inline bool checkCheckSum() const
|
||||
{
|
||||
size_t calculated_check_sum = calculateCheckSum();
|
||||
size_t check_sum = getCheckSum();
|
||||
@ -246,16 +245,16 @@ public:
|
||||
}
|
||||
|
||||
/// Write check sum in block header
|
||||
ALWAYS_INLINE inline void writeCheckSum()
|
||||
inline void writeCheckSum()
|
||||
{
|
||||
size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
|
||||
std::memcpy(block_data, &check_sum, sizeof(size_t));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline size_t getBlockSize() const { return block_size; }
|
||||
inline size_t getBlockSize() const { return block_size; }
|
||||
|
||||
/// Returns block data
|
||||
ALWAYS_INLINE inline char * getBlockData() const { return block_data; }
|
||||
inline char * getBlockData() const { return block_data; }
|
||||
|
||||
/// Read keys that were serialized in block
|
||||
/// It is client responsibility to ensure that simple or complex keys were written in block
|
||||
@ -337,9 +336,7 @@ inline bool operator==(const SSDCacheIndex & lhs, const SSDCacheIndex & rhs)
|
||||
return lhs.block_index == rhs.block_index && lhs.offset_in_block == rhs.offset_in_block;
|
||||
}
|
||||
|
||||
/** SSDCacheMemoryBuffer initialized with block size and memory buffer blocks size.
|
||||
* Allocate block_size * memory_buffer_blocks_size bytes with page alignment.
|
||||
* Logically represents multiple memory_buffer_blocks_size blocks and current write block.
|
||||
/** Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks and current write block.
|
||||
* If key cannot be written into current_write_block, current block keys size and check summ is written
|
||||
* and buffer increase index of current_write_block_index.
|
||||
* If current_write_block_index == memory_buffer_blocks_size write key will always returns true.
|
||||
@ -444,7 +441,7 @@ private:
|
||||
size_t current_block_index = 0;
|
||||
};
|
||||
|
||||
/// TODO: Add documentation
|
||||
/// Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks on file system
|
||||
template <typename SSDCacheKeyType>
|
||||
class SSDCacheFileBuffer : private boost::noncopyable
|
||||
{
|
||||
@ -614,11 +611,13 @@ public:
|
||||
}
|
||||
|
||||
template <typename FetchBlockFunc>
|
||||
ALWAYS_INLINE void fetchBlocks(char * read_buffer, size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
|
||||
void fetchBlocks(size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
|
||||
{
|
||||
if (blocks_to_fetch.empty())
|
||||
return;
|
||||
|
||||
Memory<Allocator<true>> read_buffer(read_from_file_buffer_blocks_size * block_size, 4096);
|
||||
|
||||
size_t blocks_to_fetch_size = blocks_to_fetch.size();
|
||||
|
||||
PaddedPODArray<iocb> requests;
|
||||
@ -631,7 +630,7 @@ public:
|
||||
{
|
||||
iocb request{};
|
||||
|
||||
char * buffer_place = read_buffer + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
|
||||
char * buffer_place = read_buffer.data() + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
request.aio.aio_lio_opcode = LIO_READ;
|
||||
@ -751,7 +750,7 @@ private:
|
||||
int fd = -1;
|
||||
};
|
||||
|
||||
ALWAYS_INLINE inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
|
||||
inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
|
||||
{
|
||||
#if defined(__FreeBSD__)
|
||||
return posix_fallocate(fd, offset, len);
|
||||
@ -760,7 +759,7 @@ private:
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline static char * getRequestBuffer(const iocb & request)
|
||||
inline static char * getRequestBuffer(const iocb & request)
|
||||
{
|
||||
char * result = nullptr;
|
||||
|
||||
@ -773,7 +772,7 @@ private:
|
||||
return result;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline static ssize_t eventResult(io_event & event)
|
||||
inline static ssize_t eventResult(io_event & event)
|
||||
{
|
||||
ssize_t bytes_written;
|
||||
|
||||
@ -795,7 +794,13 @@ private:
|
||||
size_t current_blocks_size = 0;
|
||||
};
|
||||
|
||||
/// TODO: Add documentation
|
||||
/** ICacheDictionaryStorage implementation that keeps column data serialized in memory index and in disk partitions.
|
||||
* Data is first written in memory buffer.
|
||||
* If memory buffer is full then buffer is flushed to disk partition.
|
||||
* If memory buffer cannot be flushed to associated disk partition, then if partition
|
||||
* can be allocated (current partition index < max_partitions_size) storage allocates new partition, if not old partitions are reused.
|
||||
* Index maps key to partition block and offset.
|
||||
*/
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
|
||||
{
|
||||
@ -806,9 +811,7 @@ public:
|
||||
explicit SSDCacheDictionaryStorage(const SSDCacheDictionaryStorageConfiguration & configuration_)
|
||||
: configuration(configuration_)
|
||||
, file_buffer(configuration_.file_path, configuration.block_size, configuration.file_blocks_size)
|
||||
, read_from_file_buffer(configuration_.block_size * configuration_.read_buffer_blocks_size, 4096)
|
||||
, rnd_engine(randomSeed())
|
||||
, index(configuration.max_stored_keys, false, { complex_key_arena })
|
||||
{
|
||||
memory_buffer_partitions.emplace_back(configuration.block_size, configuration.write_buffer_blocks_size);
|
||||
}
|
||||
@ -897,14 +900,31 @@ public:
|
||||
|
||||
size_t getSize() const override { return index.size(); }
|
||||
|
||||
size_t getMaxSize() const override {return index.getMaxSize(); }
|
||||
double getLoadFactor() const override
|
||||
{
|
||||
size_t partitions_size = memory_buffer_partitions.size();
|
||||
|
||||
if (partitions_size == configuration.max_partitions_count)
|
||||
return 1.0;
|
||||
|
||||
auto & current_memory_partition = memory_buffer_partitions[current_partition_index];
|
||||
|
||||
size_t full_partitions = partitions_size - 1;
|
||||
size_t blocks_in_memory = (full_partitions * configuration.write_buffer_blocks_size) + current_memory_partition.getCurrentBlockIndex();
|
||||
size_t blocks_on_disk = file_buffer.getCurrentBlockIndex();
|
||||
|
||||
size_t max_blocks_size = (configuration.file_blocks_size + configuration.write_buffer_blocks_size) * configuration.max_partitions_count;
|
||||
|
||||
double load_factor = static_cast<double>(blocks_in_memory + blocks_on_disk) / max_blocks_size;
|
||||
return load_factor;
|
||||
}
|
||||
|
||||
size_t getBytesAllocated() const override
|
||||
{
|
||||
size_t memory_partitions_bytes_size = memory_buffer_partitions.size() * configuration.write_buffer_blocks_size * configuration.block_size;
|
||||
size_t file_partitions_bytes_size = memory_buffer_partitions.size() * configuration.file_blocks_size * configuration.block_size;
|
||||
|
||||
return index.getSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
|
||||
return index.getBufferSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -920,8 +940,7 @@ private:
|
||||
default_value
|
||||
};
|
||||
|
||||
TimePoint deadline;
|
||||
|
||||
time_t deadline;
|
||||
SSDCacheIndex index;
|
||||
size_t in_memory_partition_index;
|
||||
CellState state;
|
||||
@ -933,13 +952,12 @@ private:
|
||||
|
||||
struct KeyToBlockOffset
|
||||
{
|
||||
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_, bool is_expired_)
|
||||
: key_index(key_index_), offset_in_block(offset_in_block_), is_expired(is_expired_)
|
||||
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_)
|
||||
: key_index(key_index_), offset_in_block(offset_in_block_)
|
||||
{}
|
||||
|
||||
size_t key_index = 0;
|
||||
size_t offset_in_block = 0;
|
||||
bool is_expired = false;
|
||||
};
|
||||
|
||||
template <typename Result>
|
||||
@ -950,20 +968,24 @@ private:
|
||||
Result result;
|
||||
|
||||
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
||||
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
|
||||
result.key_index_to_state.resize_fill(keys.size());
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||
|
||||
size_t fetched_columns_index = 0;
|
||||
|
||||
using BlockIndexToKeysMap = std::unordered_map<size_t, std::vector<KeyToBlockOffset>, DefaultHash<size_t>>;
|
||||
using BlockIndexToKeysMap = absl::flat_hash_map<size_t, PaddedPODArray<KeyToBlockOffset>, DefaultHash<size_t>>;
|
||||
BlockIndexToKeysMap block_to_keys_map;
|
||||
absl::flat_hash_set<size_t, DefaultHash<size_t>> unique_blocks_to_request;
|
||||
PaddedPODArray<size_t> blocks_to_request;
|
||||
|
||||
std::chrono::seconds strict_max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
|
||||
time_t strict_max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
|
||||
size_t keys_size = keys.size();
|
||||
|
||||
for (size_t attribute_size = 0; attribute_size < fetch_request.attributesSize(); ++attribute_size)
|
||||
if (fetch_request.shouldFillResultColumnWithIndex(attribute_size))
|
||||
result.fetched_columns[attribute_size]->reserve(keys_size);
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys[key_index];
|
||||
@ -978,9 +1000,7 @@ private:
|
||||
|
||||
const auto & cell = it->getMapped();
|
||||
|
||||
bool has_deadline = cellHasDeadline(cell);
|
||||
|
||||
if (has_deadline && now > cell.deadline + strict_max_lifetime_seconds)
|
||||
if (unlikely(now > cell.deadline + strict_max_lifetime_seconds))
|
||||
{
|
||||
++result.not_found_keys_size;
|
||||
continue;
|
||||
@ -989,14 +1009,14 @@ private:
|
||||
bool cell_is_expired = false;
|
||||
KeyState::State key_state = KeyState::found;
|
||||
|
||||
if (has_deadline && now > cell.deadline)
|
||||
if (now > cell.deadline)
|
||||
{
|
||||
cell_is_expired = true;
|
||||
key_state = KeyState::expired;
|
||||
}
|
||||
|
||||
result.expired_keys_size += cell_is_expired;
|
||||
result.found_keys_size += !cell_is_expired;
|
||||
result.expired_keys_size += static_cast<size_t>(cell_is_expired);
|
||||
result.found_keys_size += static_cast<size_t>(!cell_is_expired);
|
||||
|
||||
switch (cell.state)
|
||||
{
|
||||
@ -1012,13 +1032,20 @@ private:
|
||||
}
|
||||
case Cell::on_disk:
|
||||
{
|
||||
block_to_keys_map[cell.index.block_index].emplace_back(key_index, cell.index.offset_in_block, cell_is_expired);
|
||||
PaddedPODArray<KeyToBlockOffset> & keys_block = block_to_keys_map[cell.index.block_index];
|
||||
keys_block.emplace_back(key_index, cell.index.offset_in_block);
|
||||
|
||||
if (!unique_blocks_to_request.contains(cell.index.block_index))
|
||||
{
|
||||
KeyState::State state = cell_is_expired ? KeyState::expired : KeyState::found;
|
||||
|
||||
/// Fetched column index will be set later during fetch blocks
|
||||
result.key_index_to_state[key_index] = {state, 0};
|
||||
|
||||
auto insert_result = unique_blocks_to_request.insert(cell.index.block_index);
|
||||
bool was_inserted = insert_result.second;
|
||||
|
||||
if (was_inserted)
|
||||
blocks_to_request.emplace_back(cell.index.block_index);
|
||||
unique_blocks_to_request.insert(cell.index.block_index);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Cell::default_value:
|
||||
@ -1037,7 +1064,7 @@ private:
|
||||
/// Sort blocks by offset before start async io requests
|
||||
std::sort(blocks_to_request.begin(), blocks_to_request.end());
|
||||
|
||||
file_buffer.fetchBlocks(read_from_file_buffer.m_data, configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
|
||||
file_buffer.fetchBlocks(configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
|
||||
{
|
||||
auto & keys_in_block = block_to_keys_map[block_index];
|
||||
|
||||
@ -1046,10 +1073,7 @@ private:
|
||||
char * key_data = block_data + key_in_block.offset_in_block;
|
||||
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
|
||||
|
||||
if (key_in_block.is_expired)
|
||||
result.key_index_to_state[key_in_block.key_index] = {KeyState::expired, fetched_columns_index};
|
||||
else
|
||||
result.key_index_to_state[key_in_block.key_index] = {KeyState::found, fetched_columns_index};
|
||||
result.key_index_to_state[key_in_block.key_index].setFetchedColumnIndex(fetched_columns_index);
|
||||
|
||||
++fetched_columns_index;
|
||||
}
|
||||
@ -1087,7 +1111,7 @@ private:
|
||||
throw Exception("Serialized columns size is greater than allowed block size and metadata", ErrorCodes::UNSUPPORTED_METHOD);
|
||||
|
||||
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
|
||||
index.erase(key);
|
||||
eraseKeyFromIndex(key);
|
||||
|
||||
Cell cell;
|
||||
setCellDeadline(cell, now);
|
||||
@ -1114,8 +1138,7 @@ private:
|
||||
|
||||
for (auto key : keys)
|
||||
{
|
||||
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
|
||||
index.erase(key);
|
||||
eraseKeyFromIndex(key);
|
||||
|
||||
Cell cell;
|
||||
|
||||
@ -1135,7 +1158,7 @@ private:
|
||||
key = updated_key;
|
||||
}
|
||||
|
||||
index.insert(key, cell);
|
||||
index[key] = cell;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1188,7 +1211,7 @@ private:
|
||||
cell.index = cache_index;
|
||||
cell.in_memory_partition_index = current_partition_index;
|
||||
|
||||
index.insert(ssd_cache_key.key, cell);
|
||||
index[ssd_cache_key.key] = cell;
|
||||
break;
|
||||
}
|
||||
else
|
||||
@ -1218,7 +1241,7 @@ private:
|
||||
if (old_key_cell.isOnDisk() &&
|
||||
old_key_block >= block_index_in_file_before_write &&
|
||||
old_key_block < file_read_end_block_index)
|
||||
index.erase(old_key);
|
||||
eraseKeyFromIndex(old_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1271,7 +1294,7 @@ private:
|
||||
cell.index = cache_index;
|
||||
cell.in_memory_partition_index = current_partition_index;
|
||||
|
||||
index.insert(ssd_cache_key.key, cell);
|
||||
index[ssd_cache_key.key] = cell;
|
||||
break;
|
||||
}
|
||||
else
|
||||
@ -1296,16 +1319,12 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
inline static bool cellHasDeadline(const Cell & cell)
|
||||
{
|
||||
return cell.deadline != std::chrono::system_clock::from_time_t(0);
|
||||
}
|
||||
|
||||
inline void setCellDeadline(Cell & cell, TimePoint now)
|
||||
{
|
||||
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
||||
{
|
||||
cell.deadline = std::chrono::system_clock::from_time_t(0);
|
||||
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
|
||||
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1313,47 +1332,45 @@ private:
|
||||
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
||||
|
||||
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
||||
cell.deadline = now + std::chrono::seconds{distribution(rnd_engine)};
|
||||
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
||||
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
||||
}
|
||||
|
||||
template <typename>
|
||||
friend class ArenaCellKeyDisposer;
|
||||
inline void eraseKeyFromIndex(KeyType key)
|
||||
{
|
||||
auto it = index.find(key);
|
||||
|
||||
if (it == nullptr)
|
||||
return;
|
||||
|
||||
/// In case of complex key in arena key is serialized from hash table
|
||||
KeyType key_copy = it->getKey();
|
||||
|
||||
index.erase(key);
|
||||
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
complex_key_arena.free(const_cast<char *>(key_copy.data), key_copy.size);
|
||||
}
|
||||
|
||||
SSDCacheDictionaryStorageConfiguration configuration;
|
||||
|
||||
SSDCacheFileBuffer<SSDCacheKeyType> file_buffer;
|
||||
|
||||
Memory<Allocator<true>> read_from_file_buffer;
|
||||
|
||||
std::vector<SSDCacheMemoryBuffer<SSDCacheKeyType>> memory_buffer_partitions;
|
||||
|
||||
pcg64 rnd_engine;
|
||||
|
||||
class ArenaCellKeyDisposer
|
||||
{
|
||||
public:
|
||||
ArenaWithFreeLists & arena;
|
||||
using SimpleKeyHashMap = HashMap<UInt64, Cell>;
|
||||
using ComplexKeyHashMap = HashMapWithSavedHash<StringRef, Cell>;
|
||||
|
||||
template <typename Key, typename Value>
|
||||
void operator()(const Key & key, const Value &) const
|
||||
{
|
||||
/// In case of complex key we keep it in arena
|
||||
if constexpr (std::is_same_v<Key, StringRef>)
|
||||
arena.free(const_cast<char *>(key.data), key.size);
|
||||
}
|
||||
};
|
||||
|
||||
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellKeyDisposer>;
|
||||
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellKeyDisposer>;
|
||||
|
||||
using CacheLRUHashMap = std::conditional_t<
|
||||
using CacheMap = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::simple,
|
||||
SimpleKeyLRUHashMap,
|
||||
ComplexKeyLRUHashMap>;
|
||||
SimpleKeyHashMap,
|
||||
ComplexKeyHashMap>;
|
||||
|
||||
ArenaWithFreeLists complex_key_arena;
|
||||
|
||||
CacheLRUHashMap index;
|
||||
CacheMap index;
|
||||
|
||||
size_t current_partition_index = 0;
|
||||
|
||||
|
@ -1,154 +0,0 @@
|
||||
clickhouse-client --query="DROP TABLE IF EXISTS simple_cache_dictionary_table_source";
|
||||
clickhouse-client --query="CREATE TABLE simple_cache_dictionary_table_source (id UInt64, value1 String, value2 UInt64, value3 String, value4 Float64, value5 Decimal64(4)) ENGINE=TinyLog;"
|
||||
clickhouse-client --query="INSERT INTO simple_cache_dictionary_table_source SELECT number, concat('Value1 ', toString(number)), number, concat('Value3 ', toString(number)), toFloat64(number), cast(number, 'Decimal64(4)') FROM system.numbers LIMIT 1000000;"
|
||||
|
||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_simple_cache_dictionary (
|
||||
id UInt64,
|
||||
value1 String,
|
||||
value2 UInt64,
|
||||
value3 String,
|
||||
value4 Float64,
|
||||
value5 Decimal64(4)
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
||||
LIFETIME(MIN 300 MAX 300)
|
||||
LAYOUT(CACHE(SIZE_IN_CELLS 100000));"
|
||||
|
||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_ssd_simple_cache_dictionary (
|
||||
id UInt64,
|
||||
value1 String,
|
||||
value2 UInt64,
|
||||
value3 String,
|
||||
value4 Float64,
|
||||
value5 Decimal64(4)
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
||||
LIFETIME(MIN 300 MAX 300)
|
||||
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 WRITE_BUFFER_SIZE 327680 MAX_STORED_KEYS 1048576 PATH '/opt/mkita/ClickHouse/build_release/programs/ssd_cache'));"
|
||||
|
||||
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_dummy_simple_cache_dictionary (
|
||||
id UInt64,
|
||||
value1 String,
|
||||
value2 UInt64,
|
||||
value3 String,
|
||||
value4 Float64,
|
||||
value5 Decimal64(4)
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
|
||||
LIFETIME(MIN 300 MAX 300)
|
||||
LAYOUT(DUMMY_SIMPLE());"
|
||||
|
||||
./clickhouse-benchmark --query="SELECT
|
||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value2', number),
|
||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value3', number),
|
||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value4', number),
|
||||
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT Null"
|
||||
|
||||
./clickhouse-benchmark --query="SELECT
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT Null"
|
||||
|
||||
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
|
||||
|
||||
./clickhouse-benchmark --query="SELECT
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT Null"
|
||||
|
||||
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
|
||||
|
||||
SELECT
|
||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value2', number),
|
||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value3', number),
|
||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value4', number),
|
||||
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT Null
|
||||
|
||||
SELECT dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000 FORMAT Null
|
||||
|
||||
SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT
|
||||
Null
|
||||
|
||||
SELECT
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT
|
||||
Null
|
||||
|
||||
SELECT
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number)
|
||||
FROM system.numbers
|
||||
LIMIT 10000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value1', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value2', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value3', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value4', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value1', number),
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value2', number),
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value3', number),
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value4', number),
|
||||
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
|
||||
FROM system.numbers
|
||||
LIMIT 100000
|
||||
FORMAT Null
|
||||
|
||||
SELECT * FROM clickhouse_simple_cache_dictionary_table;
|
@ -1,6 +1,6 @@
|
||||
#include "CacheDictionary.h"
|
||||
#include "SSDCacheDictionaryStorage.h"
|
||||
#include "CacheDictionaryStorage.h"
|
||||
#include "SSDCacheDictionaryStorage.h"
|
||||
#include <Dictionaries/DictionaryFactory.h>
|
||||
|
||||
namespace DB
|
||||
@ -20,13 +20,13 @@ CacheDictionaryStorageConfiguration parseCacheStorageConfiguration(
|
||||
const DictionaryLifetime & dict_lifetime,
|
||||
DictionaryKeyType dictionary_key_type)
|
||||
{
|
||||
String dictionary_type_prefix = dictionary_key_type == DictionaryKeyType::complex ? ".complex_key_cache." : ".cache.";
|
||||
String dictionary_type_prefix = (dictionary_key_type == DictionaryKeyType::complex) ? ".complex_key_cache." : ".cache.";
|
||||
String dictionary_configuration_prefix = layout_prefix + dictionary_type_prefix;
|
||||
|
||||
const size_t size = config.getUInt64(dictionary_configuration_prefix + "size_in_cells");
|
||||
if (size == 0)
|
||||
throw Exception(ErrorCodes::TOO_SMALL_BUFFER_SIZE,
|
||||
"({}: cache dictionary cannot have 0 cells",
|
||||
"({}): cache dictionary cannot have 0 cells",
|
||||
full_name);
|
||||
|
||||
size_t dict_lifetime_seconds = static_cast<size_t>(dict_lifetime.max_sec);
|
||||
@ -59,7 +59,6 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
|
||||
static constexpr size_t DEFAULT_READ_BUFFER_SIZE_BYTES = 16 * DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
||||
static constexpr size_t DEFAULT_WRITE_BUFFER_SIZE_BYTES = DEFAULT_SSD_BLOCK_SIZE_BYTES;
|
||||
|
||||
static constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000;
|
||||
static constexpr size_t DEFAULT_PARTITIONS_COUNT = 16;
|
||||
|
||||
const size_t max_partitions_count
|
||||
@ -94,16 +93,11 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
|
||||
if (directory_path.at(0) != '/')
|
||||
directory_path = std::filesystem::path{config.getString("path")}.concat(directory_path).string();
|
||||
|
||||
const size_t max_stored_keys_in_partition
|
||||
= config.getInt64(dictionary_configuration_prefix + "max_stored_keys", DEFAULT_MAX_STORED_KEYS);
|
||||
const size_t rounded_size = roundUpToPowerOfTwoOrZero(max_stored_keys_in_partition);
|
||||
|
||||
SSDCacheDictionaryStorageConfiguration configuration{
|
||||
strict_max_lifetime_seconds,
|
||||
dict_lifetime,
|
||||
directory_path,
|
||||
max_partitions_count,
|
||||
rounded_size,
|
||||
block_size,
|
||||
file_size / block_size,
|
||||
read_buffer_size / block_size,
|
||||
@ -194,7 +188,8 @@ DictionaryPtr createCacheDictionaryLayout(
|
||||
const bool allow_read_expired_keys = config.getBool(layout_prefix + ".cache.allow_read_expired_keys", false);
|
||||
|
||||
auto storage_configuration = parseCacheStorageConfiguration(full_name, config, layout_prefix, dict_lifetime, dictionary_key_type);
|
||||
auto storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(storage_configuration);
|
||||
|
||||
std::shared_ptr<ICacheDictionaryStorage> storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(dict_struct, storage_configuration);
|
||||
|
||||
auto update_queue_configuration = parseCacheDictionaryUpdateQueueConfiguration(full_name, config, layout_prefix, dictionary_key_type);
|
||||
|
||||
|
@ -209,7 +209,13 @@ void DiskCacheWrapper::clearDirectory(const String & path)
|
||||
void DiskCacheWrapper::moveDirectory(const String & from_path, const String & to_path)
|
||||
{
|
||||
if (cache_disk->exists(from_path))
|
||||
{
|
||||
/// Destination directory may not be empty if previous directory move attempt was failed.
|
||||
if (cache_disk->exists(to_path) && cache_disk->isDirectory(to_path))
|
||||
cache_disk->clearDirectory(to_path);
|
||||
|
||||
cache_disk->moveDirectory(from_path, to_path);
|
||||
}
|
||||
DiskDecorator::moveDirectory(from_path, to_path);
|
||||
}
|
||||
|
||||
|
@ -538,8 +538,9 @@ private:
|
||||
|
||||
[[maybe_unused]] const auto block_size = static_cast<size_t>(EVP_CIPHER_block_size(evp_cipher));
|
||||
[[maybe_unused]] const auto iv_size = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
|
||||
const auto key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
|
||||
const auto tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
|
||||
|
||||
const size_t key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
|
||||
static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
|
||||
|
||||
auto decrypted_result_column = ColumnString::create();
|
||||
auto & decrypted_result_column_data = decrypted_result_column->getChars();
|
||||
@ -549,9 +550,17 @@ private:
|
||||
size_t resulting_size = 0;
|
||||
for (size_t r = 0; r < input_rows_count; ++r)
|
||||
{
|
||||
resulting_size += input_column->getDataAt(r).size + 1;
|
||||
size_t string_size = input_column->getDataAt(r).size;
|
||||
resulting_size += string_size + 1; /// With terminating zero.
|
||||
|
||||
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
|
||||
{
|
||||
if (string_size < tag_size)
|
||||
throw Exception("Encrypted data is smaller than the size of additional data for AEAD mode, cannot decrypt.",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
resulting_size -= tag_size;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(MEMORY_SANITIZER)
|
||||
@ -565,6 +574,7 @@ private:
|
||||
decrypted_result_column_data.resize(resulting_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
auto * decrypted = decrypted_result_column_data.data();
|
||||
|
||||
KeyHolder<mode> key_holder;
|
||||
@ -631,7 +641,7 @@ private:
|
||||
// 1.a.2: Set AAD if present
|
||||
if (aad_column)
|
||||
{
|
||||
const auto aad_data = aad_column->getDataAt(r);
|
||||
StringRef aad_data = aad_column->getDataAt(r);
|
||||
int tmp_len = 0;
|
||||
if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len,
|
||||
reinterpret_cast<const unsigned char *>(aad_data.data), aad_data.size) != 1)
|
||||
|
@ -42,11 +42,11 @@ struct SimdJSONParser
|
||||
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
|
||||
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
|
||||
|
||||
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
|
||||
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
|
||||
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
|
||||
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
|
||||
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
|
||||
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
|
||||
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
|
||||
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
|
||||
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
|
||||
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
|
||||
ALWAYS_INLINE Array getArray() const;
|
||||
ALWAYS_INLINE Object getObject() const;
|
||||
|
||||
@ -75,7 +75,7 @@ struct SimdJSONParser
|
||||
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
|
||||
ALWAYS_INLINE Iterator end() const { return array.end(); }
|
||||
ALWAYS_INLINE size_t size() const { return array.size(); }
|
||||
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
|
||||
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
|
||||
|
||||
private:
|
||||
simdjson::dom::array array;
|
||||
@ -111,7 +111,7 @@ struct SimdJSONParser
|
||||
if (x.error())
|
||||
return false;
|
||||
|
||||
result = x.first;
|
||||
result = x.value_unsafe();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -137,7 +137,7 @@ struct SimdJSONParser
|
||||
if (document.error())
|
||||
return false;
|
||||
|
||||
result = document.first;
|
||||
result = document.value_unsafe();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -155,12 +155,12 @@ private:
|
||||
|
||||
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
|
||||
{
|
||||
return element.get_array().first;
|
||||
return element.get_array().value_unsafe();
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
|
||||
{
|
||||
return element.get_object().first;
|
||||
return element.get_object().value_unsafe();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -49,8 +49,11 @@ public:
|
||||
{}
|
||||
|
||||
template <typename ... Args>
|
||||
inline auto execute(const DateTime64 & t, Args && ... args) const
|
||||
inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
|
||||
{
|
||||
/// Type conversion from float to integer may be required.
|
||||
/// We are Ok with implementation specific result for out of range and denormals conversion.
|
||||
|
||||
if constexpr (TransformHasExecuteOverload_v<DateTime64, decltype(scale_multiplier), Args...>)
|
||||
{
|
||||
return wrapped_transform.execute(t, scale_multiplier, std::forward<Args>(args)...);
|
||||
|
@ -90,7 +90,70 @@ struct ExtractFirstSignificantSubdomain
|
||||
res_data += last_3_periods[1] + 1 - begin;
|
||||
res_size = last_3_periods[0] - last_3_periods[1] - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The difference with execute() is due to custom TLD list can have records of any level,
|
||||
/// not only 2-nd level (like non-custom variant), so it requires more lookups.
|
||||
template <class Lookup>
|
||||
static void executeCustom(const Lookup & lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
|
||||
{
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
|
||||
Pos tmp;
|
||||
size_t domain_length;
|
||||
ExtractDomain<without_www>::execute(data, size, tmp, domain_length);
|
||||
|
||||
if (domain_length == 0)
|
||||
return;
|
||||
|
||||
if (out_domain_end)
|
||||
*out_domain_end = tmp + domain_length;
|
||||
|
||||
/// cut useless dot
|
||||
if (tmp[domain_length - 1] == '.')
|
||||
--domain_length;
|
||||
|
||||
res_data = tmp;
|
||||
res_size = domain_length;
|
||||
|
||||
auto begin = tmp;
|
||||
auto end = begin + domain_length;
|
||||
const char * last_2_periods[2]{};
|
||||
const char * prev = begin - 1;
|
||||
|
||||
auto pos = find_first_symbols<'.'>(begin, end);
|
||||
while (pos < end)
|
||||
{
|
||||
if (lookup(pos + 1, end - pos - 1))
|
||||
{
|
||||
res_data += prev + 1 - begin;
|
||||
res_size = end - 1 - prev;
|
||||
return;
|
||||
}
|
||||
|
||||
last_2_periods[1] = last_2_periods[0];
|
||||
last_2_periods[0] = pos;
|
||||
prev = pos;
|
||||
pos = find_first_symbols<'.'>(pos + 1, end);
|
||||
}
|
||||
|
||||
/// if there is domain of the first level (i.e. no dots in the hostname) -> return nothing
|
||||
if (!last_2_periods[0])
|
||||
return;
|
||||
|
||||
/// if there is domain of the second level -> always return itself
|
||||
if (!last_2_periods[1])
|
||||
{
|
||||
res_size = last_2_periods[0] - begin;
|
||||
return;
|
||||
}
|
||||
|
||||
/// if there is domain of the 3+ level, and zero records in TLD list ->
|
||||
/// fallback to domain of the second level
|
||||
res_data += last_2_periods[1] + 1 - begin;
|
||||
res_size = last_2_periods[0] - last_2_periods[1] - 1;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -17,10 +17,10 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
struct FirstSignificantSubdomainCustomtLookup
|
||||
struct FirstSignificantSubdomainCustomLookup
|
||||
{
|
||||
const TLDList & tld_list;
|
||||
FirstSignificantSubdomainCustomtLookup(const std::string & tld_list_name)
|
||||
FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name)
|
||||
: tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name))
|
||||
{
|
||||
}
|
||||
@ -63,7 +63,7 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
FirstSignificantSubdomainCustomtLookup tld_lookup(column_tld_list_name->getValue<String>());
|
||||
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
|
||||
|
||||
/// FIXME: convertToFullColumnIfConst() is suboptimal
|
||||
auto column = arguments[0].column->convertToFullColumnIfConst();
|
||||
@ -79,7 +79,7 @@ public:
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
static void vector(FirstSignificantSubdomainCustomtLookup & tld_lookup,
|
||||
static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup,
|
||||
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
|
@ -10,7 +10,7 @@ struct CutToFirstSignificantSubdomainCustom
|
||||
{
|
||||
static size_t getReserveLengthForElement() { return 15; }
|
||||
|
||||
static void execute(FirstSignificantSubdomainCustomtLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
|
||||
static void execute(FirstSignificantSubdomainCustomLookup & tld_lookup, const Pos data, const size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
@ -18,7 +18,7 @@ struct CutToFirstSignificantSubdomainCustom
|
||||
Pos tmp_data;
|
||||
size_t tmp_length;
|
||||
Pos domain_end;
|
||||
ExtractFirstSignificantSubdomain<without_www>::execute(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
|
||||
ExtractFirstSignificantSubdomain<without_www>::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end);
|
||||
|
||||
if (tmp_length == 0)
|
||||
return;
|
||||
|
@ -190,7 +190,7 @@ private:
|
||||
}
|
||||
|
||||
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
|
||||
if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
|
||||
if (static_cast<size_t>(max_key) - static_cast<size_t>(min_key) > MAX_ARRAY_SIZE)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
|
||||
|
||||
/* fill the result arrays */
|
||||
|
@ -16,6 +16,7 @@ namespace ErrorCodes
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -110,6 +111,9 @@ public:
|
||||
arguments[2].column->getFloat64(i),
|
||||
max_width);
|
||||
|
||||
if (!isFinite(width))
|
||||
throw Exception("Value of width must not be NaN and Inf", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
|
||||
dst_chars.resize(next_size);
|
||||
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
|
||||
|
@ -41,7 +41,8 @@ void registerFunctionThrowIf(FunctionFactory &);
|
||||
void registerFunctionVersion(FunctionFactory &);
|
||||
void registerFunctionBuildId(FunctionFactory &);
|
||||
void registerFunctionUptime(FunctionFactory &);
|
||||
void registerFunctionTimeZone(FunctionFactory &);
|
||||
void registerFunctionTimezone(FunctionFactory &);
|
||||
void registerFunctionTimezoneOf(FunctionFactory &);
|
||||
void registerFunctionRunningAccumulate(FunctionFactory &);
|
||||
void registerFunctionRunningDifference(FunctionFactory &);
|
||||
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
|
||||
@ -111,7 +112,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionVersion(factory);
|
||||
registerFunctionBuildId(factory);
|
||||
registerFunctionUptime(factory);
|
||||
registerFunctionTimeZone(factory);
|
||||
registerFunctionTimezone(factory);
|
||||
registerFunctionTimezoneOf(factory);
|
||||
registerFunctionRunningAccumulate(factory);
|
||||
registerFunctionRunningDifference(factory);
|
||||
registerFunctionRunningDifferenceStartingWithFirstValue(factory);
|
||||
|
@ -12,13 +12,13 @@ namespace
|
||||
|
||||
/** Returns the server time zone.
|
||||
*/
|
||||
class FunctionTimeZone : public IFunction
|
||||
class FunctionTimezone : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "timezone";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionTimeZone>();
|
||||
return std::make_shared<FunctionTimezone>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
@ -45,9 +45,10 @@ public:
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionTimeZone(FunctionFactory & factory)
|
||||
void registerFunctionTimezone(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionTimeZone>();
|
||||
factory.registerFunction<FunctionTimezone>();
|
||||
factory.registerAlias("timeZone", "timezone");
|
||||
}
|
||||
|
||||
}
|
||||
|
118
src/Functions/timezoneOf.cpp
Normal file
118
src/Functions/timezoneOf.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
#include <Functions/IFunctionImpl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <common/DateLUTImpl.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
||||
/** timezoneOf(x) - get the name of the timezone of DateTime data type.
|
||||
* Example: Europe/Moscow.
|
||||
*/
|
||||
class ExecutableFunctionTimezoneOf : public IExecutableFunctionImpl
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "timezoneOf";
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
||||
/// Execute the function on the columns.
|
||||
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
|
||||
|
||||
return DataTypeString().createColumnConst(input_rows_count,
|
||||
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class BaseFunctionTimezoneOf : public IFunctionBaseImpl
|
||||
{
|
||||
public:
|
||||
BaseFunctionTimezoneOf(DataTypes argument_types_, DataTypePtr return_type_)
|
||||
: argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
|
||||
|
||||
static constexpr auto name = "timezoneOf";
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
const DataTypes & getArgumentTypes() const override { return argument_types; }
|
||||
const DataTypePtr & getResultType() const override { return return_type; }
|
||||
|
||||
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
return std::make_unique<ExecutableFunctionTimezoneOf>();
|
||||
}
|
||||
|
||||
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
|
||||
|
||||
return DataTypeString().createColumnConst(1,
|
||||
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
|
||||
}
|
||||
|
||||
private:
|
||||
DataTypes argument_types;
|
||||
DataTypePtr return_type;
|
||||
};
|
||||
|
||||
|
||||
class FunctionTimezoneOfBuilder : public IFunctionOverloadResolverImpl
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "timezoneOf";
|
||||
String getName() const override { return name; }
|
||||
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<FunctionTimezoneOfBuilder>(); }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
DataTypePtr getReturnType(const DataTypes & types) const override
|
||||
{
|
||||
DataTypePtr type_no_nullable = removeNullable(types[0]);
|
||||
|
||||
if (isDateTime(type_no_nullable) || isDateTime64(type_no_nullable))
|
||||
return std::make_shared<DataTypeString>();
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad argument for function {}, should be DateTime or DateTime64", name);
|
||||
}
|
||||
|
||||
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
||||
{
|
||||
return std::make_unique<BaseFunctionTimezoneOf>(DataTypes{arguments[0].type}, return_type);
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionTimezoneOf(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionTimezoneOfBuilder>();
|
||||
factory.registerAlias("timeZoneOf", "timezoneOf");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,11 +21,11 @@ namespace
|
||||
{
|
||||
|
||||
/// Just changes time zone information for data type. The calculation is free.
|
||||
class FunctionToTimeZone : public IFunction
|
||||
class FunctionToTimezone : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "toTimeZone";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimeZone>(); }
|
||||
static constexpr auto name = "toTimezone";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimezone>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
@ -64,7 +64,8 @@ public:
|
||||
|
||||
void registerFunctionToTimeZone(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionToTimeZone>();
|
||||
factory.registerFunction<FunctionToTimezone>();
|
||||
factory.registerAlias("toTimeZone", "toTimezone");
|
||||
}
|
||||
|
||||
}
|
@ -467,6 +467,7 @@ SRCS(
|
||||
timeSlot.cpp
|
||||
timeSlots.cpp
|
||||
timezone.cpp
|
||||
timezoneOf.cpp
|
||||
timezoneOffset.cpp
|
||||
toColumnTypeName.cpp
|
||||
toCustomWeek.cpp
|
||||
@ -506,7 +507,7 @@ SRCS(
|
||||
toStartOfTenMinutes.cpp
|
||||
toStartOfYear.cpp
|
||||
toTime.cpp
|
||||
toTimeZone.cpp
|
||||
toTimezone.cpp
|
||||
toTypeName.cpp
|
||||
toUnixTimestamp64Micro.cpp
|
||||
toUnixTimestamp64Milli.cpp
|
||||
|
@ -106,7 +106,7 @@ void BrotliWriteBuffer::finish()
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->next();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
|
@ -105,7 +105,7 @@ void LZMADeflatingWriteBuffer::finish()
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->next();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
|
@ -82,6 +82,7 @@ bool PeekableReadBuffer::peekNext()
|
||||
checkpoint.emplace(memory.data());
|
||||
checkpoint_in_own_memory = true;
|
||||
}
|
||||
|
||||
if (currentlyReadFromOwnMemory())
|
||||
{
|
||||
/// Update buffer size
|
||||
@ -99,7 +100,6 @@ bool PeekableReadBuffer::peekNext()
|
||||
pos_offset = 0;
|
||||
}
|
||||
BufferBase::set(memory.data(), peeked_size + bytes_to_copy, pos_offset);
|
||||
|
||||
}
|
||||
|
||||
peeked_size += bytes_to_copy;
|
||||
@ -113,12 +113,21 @@ void PeekableReadBuffer::rollbackToCheckpoint(bool drop)
|
||||
{
|
||||
checkStateCorrect();
|
||||
|
||||
if (!checkpoint)
|
||||
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
||||
else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
|
||||
assert(checkpoint);
|
||||
|
||||
if (checkpointInOwnMemory() == currentlyReadFromOwnMemory())
|
||||
{
|
||||
/// Both checkpoint and position are in the same buffer.
|
||||
pos = *checkpoint;
|
||||
else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Checkpoint is in own memory and position is not.
|
||||
assert(checkpointInOwnMemory());
|
||||
|
||||
/// Switch to reading from own memory.
|
||||
BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data());
|
||||
}
|
||||
|
||||
if (drop)
|
||||
dropCheckpoint();
|
||||
@ -134,6 +143,7 @@ bool PeekableReadBuffer::nextImpl()
|
||||
|
||||
checkStateCorrect();
|
||||
bool res;
|
||||
bool checkpoint_at_end = checkpoint && *checkpoint == working_buffer.end() && currentlyReadFromOwnMemory();
|
||||
|
||||
if (checkpoint)
|
||||
{
|
||||
@ -163,6 +173,13 @@ bool PeekableReadBuffer::nextImpl()
|
||||
BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
|
||||
nextimpl_working_buffer_offset = sub_buf.offset();
|
||||
|
||||
if (checkpoint_at_end)
|
||||
{
|
||||
checkpoint.emplace(working_buffer.begin());
|
||||
peeked_size = 0;
|
||||
checkpoint_in_own_memory = false;
|
||||
}
|
||||
|
||||
checkStateCorrect();
|
||||
return res;
|
||||
}
|
||||
|
@ -43,10 +43,7 @@ public:
|
||||
/// Forget checkpoint and all data between checkpoint and position
|
||||
ALWAYS_INLINE inline void dropCheckpoint()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if (!checkpoint)
|
||||
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
|
||||
#endif
|
||||
assert(checkpoint);
|
||||
if (!currentlyReadFromOwnMemory())
|
||||
{
|
||||
/// Don't need to store unread data anymore
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Poco/Net/NetException.h>
|
||||
|
||||
#include <IO/ReadBufferFromPocoSocket.h>
|
||||
#include <IO/TimeoutSetter.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -27,23 +28,23 @@ bool ReadBufferFromPocoSocket::nextImpl()
|
||||
ssize_t bytes_read = 0;
|
||||
Stopwatch watch;
|
||||
|
||||
int flags = 0;
|
||||
if (async_callback)
|
||||
flags |= MSG_DONTWAIT;
|
||||
|
||||
/// Add more details to exceptions.
|
||||
try
|
||||
{
|
||||
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
|
||||
|
||||
/// If async_callback is specified, and read is blocking, run async_callback and try again later.
|
||||
/// If async_callback is specified, and read will block, run async_callback and try again later.
|
||||
/// It is expected that file descriptor may be polled externally.
|
||||
/// Note that receive timeout is not checked here. External code should check it while polling.
|
||||
while (bytes_read < 0 && async_callback && errno == EAGAIN)
|
||||
{
|
||||
while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ))
|
||||
async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
|
||||
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
|
||||
}
|
||||
|
||||
/// receiveBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
|
||||
/// but we want to get this exception exactly after receive_timeout. So, set send_timeout = receive_timeout
|
||||
/// before receiveBytes.
|
||||
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
|
||||
if (socket.secure())
|
||||
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getReceiveTimeout(), socket.getReceiveTimeout());
|
||||
|
||||
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
|
||||
}
|
||||
catch (const Poco::Net::NetException & e)
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "TimeoutSetter.h"
|
||||
#include <IO/TimeoutSetter.h>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Poco/Net/NetException.h>
|
||||
|
||||
#include <IO/WriteBufferFromPocoSocket.h>
|
||||
#include <IO/TimeoutSetter.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NetException.h>
|
||||
@ -40,6 +41,13 @@ void WriteBufferFromPocoSocket::nextImpl()
|
||||
/// Add more details to exceptions.
|
||||
try
|
||||
{
|
||||
/// sendBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
|
||||
/// but we want to get this exception exactly after send_timeout. So, set receive_timeout = send_timeout
|
||||
/// before sendBytes.
|
||||
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
|
||||
if (socket.secure())
|
||||
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getSendTimeout(), socket.getSendTimeout());
|
||||
|
||||
res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
|
||||
}
|
||||
catch (const Poco::Net::NetException & e)
|
||||
|
@ -120,7 +120,7 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ void ZlibDeflatingWriteBuffer::finish()
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->next();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
|
@ -94,7 +94,7 @@ void ZstdDeflatingWriteBuffer::finish()
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->next();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
|
@ -6,11 +6,6 @@
|
||||
#include <IO/ConcatReadBuffer.h>
|
||||
#include <IO/PeekableReadBuffer.h>
|
||||
|
||||
namespace DB::ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static void readAndAssert(DB::ReadBuffer & buf, const char * str)
|
||||
{
|
||||
size_t n = strlen(str);
|
||||
@ -48,20 +43,6 @@ try
|
||||
readAndAssert(peekable, "01234");
|
||||
}
|
||||
|
||||
#ifndef ABORT_ON_LOGICAL_ERROR
|
||||
bool exception = false;
|
||||
try
|
||||
{
|
||||
peekable.rollbackToCheckpoint();
|
||||
}
|
||||
catch (DB::Exception & e)
|
||||
{
|
||||
if (e.code() != DB::ErrorCodes::LOGICAL_ERROR)
|
||||
throw;
|
||||
exception = true;
|
||||
}
|
||||
ASSERT_TRUE(exception);
|
||||
#endif
|
||||
assertAvailable(peekable, "56789");
|
||||
|
||||
readAndAssert(peekable, "56");
|
||||
|
@ -50,6 +50,7 @@ SRCS(
|
||||
ReadBufferFromPocoSocket.cpp
|
||||
ReadHelpers.cpp
|
||||
SeekAvoidingReadBuffer.cpp
|
||||
TimeoutSetter.cpp
|
||||
UseSSL.cpp
|
||||
WriteBufferFromFile.cpp
|
||||
WriteBufferFromFileBase.cpp
|
||||
|
@ -818,13 +818,10 @@ private:
|
||||
if (!min_id)
|
||||
min_id = getMinIDToFinishLoading(forced_to_reload);
|
||||
|
||||
if (info->state_id >= min_id)
|
||||
return true; /// stop
|
||||
|
||||
if (info->loading_id < min_id)
|
||||
startLoading(*info, forced_to_reload, *min_id);
|
||||
|
||||
/// Wait for the next event if loading wasn't completed, and stop otherwise.
|
||||
/// Wait for the next event if loading wasn't completed, or stop otherwise.
|
||||
return (info->state_id >= min_id);
|
||||
};
|
||||
|
||||
@ -850,9 +847,6 @@ private:
|
||||
if (filter && !filter(name))
|
||||
continue;
|
||||
|
||||
if (info.state_id >= min_id)
|
||||
continue;
|
||||
|
||||
if (info.loading_id < min_id)
|
||||
startLoading(info, forced_to_reload, *min_id);
|
||||
|
||||
|
@ -260,7 +260,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
||||
renamed = true;
|
||||
}
|
||||
|
||||
database->loadStoredObjects(context, has_force_restore_data_flag, create.attach && force_attach);
|
||||
/// We use global context here, because storages lifetime is bigger than query context lifetime
|
||||
database->loadStoredObjects(context.getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -970,7 +971,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
if (create.as_table_function)
|
||||
{
|
||||
const auto & factory = TableFunctionFactory::instance();
|
||||
res = factory.get(create.as_table_function, context)->execute(create.as_table_function, context, create.table, properties.columns);
|
||||
auto table_func = factory.get(create.as_table_function, context);
|
||||
res = table_func->execute(create.as_table_function, context, create.table, properties.columns);
|
||||
res->renameInMemory({create.database, create.table, create.uuid});
|
||||
}
|
||||
else
|
||||
|
@ -393,7 +393,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
view = nullptr;
|
||||
}
|
||||
|
||||
if (try_move_to_prewhere && storage && query.where() && !query.prewhere() && !query.final())
|
||||
if (try_move_to_prewhere && storage && query.where() && !query.prewhere())
|
||||
{
|
||||
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
|
||||
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <Common/ActionBlocker.h>
|
||||
#include <common/types.h>
|
||||
|
||||
#include <Poco/Net/HTMLForm.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <shared_mutex>
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/WindowDescription.h>
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
@ -60,7 +61,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << abs(begin_offset);
|
||||
buf << applyVisitor(FieldVisitorToString(), begin_offset);
|
||||
buf << " "
|
||||
<< (begin_preceding ? "PRECEDING" : "FOLLOWING");
|
||||
}
|
||||
@ -77,7 +78,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
|
||||
}
|
||||
else
|
||||
{
|
||||
buf << abs(end_offset);
|
||||
buf << applyVisitor(FieldVisitorToString(), end_offset);
|
||||
buf << " "
|
||||
<< (end_preceding ? "PRECEDING" : "FOLLOWING");
|
||||
}
|
||||
@ -121,23 +122,33 @@ void WindowFrame::checkValid() const
|
||||
if (end_type == BoundaryType::Offset
|
||||
&& begin_type == BoundaryType::Offset)
|
||||
{
|
||||
// Frame starting with following rows can't have preceding rows.
|
||||
if (!(end_preceding && !begin_preceding))
|
||||
// Frame start offset must be less or equal that the frame end offset.
|
||||
bool begin_less_equal_end;
|
||||
if (begin_preceding && end_preceding)
|
||||
{
|
||||
// Frame start offset must be less or equal that the frame end offset.
|
||||
const bool begin_before_end
|
||||
= begin_offset * (begin_preceding ? -1 : 1)
|
||||
<= end_offset * (end_preceding ? -1 : 1);
|
||||
|
||||
if (!begin_before_end)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame start offset {} {} does not precede the frame end offset {} {}",
|
||||
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
|
||||
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
|
||||
}
|
||||
return;
|
||||
begin_less_equal_end = begin_offset >= end_offset;
|
||||
}
|
||||
else if (begin_preceding && !end_preceding)
|
||||
{
|
||||
begin_less_equal_end = true;
|
||||
}
|
||||
else if (!begin_preceding && end_preceding)
|
||||
{
|
||||
begin_less_equal_end = false;
|
||||
}
|
||||
else /* if (!begin_preceding && !end_preceding) */
|
||||
{
|
||||
begin_less_equal_end = begin_offset <= end_offset;
|
||||
}
|
||||
|
||||
if (!begin_less_equal_end)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame start offset {} {} does not precede the frame end offset {} {}",
|
||||
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
|
||||
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
|
@ -44,14 +44,13 @@ struct WindowFrame
|
||||
// Offset might be both preceding and following, controlled by begin_preceding,
|
||||
// but the offset value must be positive.
|
||||
BoundaryType begin_type = BoundaryType::Unbounded;
|
||||
// This should have been a Field but I'm getting some crazy linker errors.
|
||||
int64_t begin_offset = 0;
|
||||
Field begin_offset = 0;
|
||||
bool begin_preceding = true;
|
||||
|
||||
// Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
|
||||
// must be false.
|
||||
BoundaryType end_type = BoundaryType::Current;
|
||||
int64_t end_offset = 0;
|
||||
Field end_offset = 0;
|
||||
bool end_preceding = false;
|
||||
|
||||
|
||||
|
@ -377,6 +377,11 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
|
||||
else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
|
||||
{
|
||||
const IDataType & nested_type = *nullable_type->getNestedType();
|
||||
|
||||
/// NULL remains NULL after any conversion.
|
||||
if (WhichDataType(nested_type).isNothing())
|
||||
return {};
|
||||
|
||||
if (from_type_hint && from_type_hint->equals(nested_type))
|
||||
return from_value;
|
||||
return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);
|
||||
|
@ -290,8 +290,6 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
|
||||
{
|
||||
Blocks result;
|
||||
|
||||
// TODO: `node` may be always-false literal.
|
||||
|
||||
if (const auto * fn = node->as<ASTFunction>())
|
||||
{
|
||||
const auto dnf = analyzeFunction(fn, target_expr);
|
||||
@ -350,6 +348,14 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (const auto * literal = node->as<ASTLiteral>())
|
||||
{
|
||||
// Check if it's always true or false.
|
||||
if (literal->value.getType() == Field::Types::UInt64 && literal->value.get<UInt64>() == 0)
|
||||
return {result};
|
||||
else
|
||||
return {};
|
||||
}
|
||||
|
||||
return {result};
|
||||
}
|
||||
|
@ -137,8 +137,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
|
||||
if (window())
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str <<
|
||||
"WINDOW " << (s.hilite ? hilite_none : "");
|
||||
window()->formatImpl(s, state, frame);
|
||||
"WINDOW" << (s.hilite ? hilite_none : "");
|
||||
window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
|
||||
}
|
||||
|
||||
if (orderBy())
|
||||
|
@ -35,6 +35,8 @@ String ASTWindowDefinition::getID(char) const
|
||||
void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
||||
FormatState & state, FormatStateStacked format_frame) const
|
||||
{
|
||||
format_frame.expression_list_prepend_whitespace = false;
|
||||
|
||||
if (partition_by)
|
||||
{
|
||||
settings.ostr << "PARTITION BY ";
|
||||
@ -70,7 +72,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
||||
}
|
||||
else
|
||||
{
|
||||
settings.ostr << abs(frame.begin_offset);
|
||||
settings.ostr << applyVisitor(FieldVisitorToString(),
|
||||
frame.begin_offset);
|
||||
settings.ostr << " "
|
||||
<< (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
|
||||
}
|
||||
@ -85,7 +88,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
||||
}
|
||||
else
|
||||
{
|
||||
settings.ostr << abs(frame.end_offset);
|
||||
settings.ostr << applyVisitor(FieldVisitorToString(),
|
||||
frame.end_offset);
|
||||
settings.ostr << " "
|
||||
<< (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
|
||||
}
|
||||
|
@ -581,30 +581,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
|
||||
else if (parser_literal.parse(pos, ast_literal, expected))
|
||||
{
|
||||
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
||||
if (!isInt64FieldType(value.getType()))
|
||||
if ((node->frame.type == WindowFrame::FrameType::Rows
|
||||
|| node->frame.type == WindowFrame::FrameType::Groups)
|
||||
&& !(value.getType() == Field::Types::UInt64
|
||||
|| (value.getType() == Field::Types::Int64
|
||||
&& value.get<Int64>() >= 0)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Only integer frame offsets are supported, '{}' is not supported.",
|
||||
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
|
||||
WindowFrame::toString(node->frame.type),
|
||||
applyVisitor(FieldVisitorToString(), value),
|
||||
Field::Types::toString(value.getType()));
|
||||
}
|
||||
node->frame.begin_offset = value.get<Int64>();
|
||||
node->frame.begin_offset = value;
|
||||
node->frame.begin_type = WindowFrame::BoundaryType::Offset;
|
||||
// We can easily get a UINT64_MAX here, which doesn't even fit into
|
||||
// int64_t. Not sure what checks we are going to need here after we
|
||||
// support floats and dates.
|
||||
if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame offset must be between {} and {}, but {} is given",
|
||||
INT_MAX, INT_MIN, node->frame.begin_offset);
|
||||
}
|
||||
|
||||
if (node->frame.begin_offset < 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame start offset must be greater than zero, {} given",
|
||||
node->frame.begin_offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -652,28 +642,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
|
||||
else if (parser_literal.parse(pos, ast_literal, expected))
|
||||
{
|
||||
const Field & value = ast_literal->as<ASTLiteral &>().value;
|
||||
if (!isInt64FieldType(value.getType()))
|
||||
if ((node->frame.type == WindowFrame::FrameType::Rows
|
||||
|| node->frame.type == WindowFrame::FrameType::Groups)
|
||||
&& !(value.getType() == Field::Types::UInt64
|
||||
|| (value.getType() == Field::Types::Int64
|
||||
&& value.get<Int64>() >= 0)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Only integer frame offsets are supported, '{}' is not supported.",
|
||||
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
|
||||
WindowFrame::toString(node->frame.type),
|
||||
applyVisitor(FieldVisitorToString(), value),
|
||||
Field::Types::toString(value.getType()));
|
||||
}
|
||||
node->frame.end_offset = value.get<Int64>();
|
||||
node->frame.end_offset = value;
|
||||
node->frame.end_type = WindowFrame::BoundaryType::Offset;
|
||||
|
||||
if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame offset must be between {} and {}, but {} is given",
|
||||
INT_MAX, INT_MIN, node->frame.end_offset);
|
||||
}
|
||||
|
||||
if (node->frame.end_offset < 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Frame end offset must be greater than zero, {} given",
|
||||
node->frame.end_offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -275,7 +275,8 @@ Token Lexer::nextTokenImpl()
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
|
||||
pos = end;
|
||||
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos);
|
||||
}
|
||||
}
|
||||
return Token(TokenType::Slash, token_begin, pos);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
@ -27,7 +28,8 @@ public:
|
||||
virtual ~IWindowFunction() = default;
|
||||
|
||||
// Must insert the result for current_row.
|
||||
virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
|
||||
virtual void windowInsertResultInto(const WindowTransform * transform,
|
||||
size_t function_index) = 0;
|
||||
};
|
||||
|
||||
// Compares ORDER BY column values at given rows to find the boundaries of frame:
|
||||
@ -37,7 +39,7 @@ template <typename ColumnType>
|
||||
static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||
size_t compared_row, const IColumn * _reference_column,
|
||||
size_t reference_row,
|
||||
uint64_t _offset,
|
||||
const Field & _offset,
|
||||
bool offset_is_preceding)
|
||||
{
|
||||
// Casting the columns to the known type here makes it faster, probably
|
||||
@ -46,7 +48,8 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||
_compared_column);
|
||||
const auto * reference_column = assert_cast<const ColumnType *>(
|
||||
_reference_column);
|
||||
const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
|
||||
const auto offset = _offset.get<typename ColumnType::ValueType>();
|
||||
assert(offset >= 0);
|
||||
|
||||
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
||||
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||
@ -101,6 +104,53 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||
}
|
||||
}
|
||||
|
||||
// A specialization of compareValuesWithOffset for floats.
|
||||
template <typename ColumnType>
|
||||
static int compareValuesWithOffsetFloat(const IColumn * _compared_column,
|
||||
size_t compared_row, const IColumn * _reference_column,
|
||||
size_t reference_row,
|
||||
const Field & _offset,
|
||||
bool offset_is_preceding)
|
||||
{
|
||||
// Casting the columns to the known type here makes it faster, probably
|
||||
// because the getData call can be devirtualized.
|
||||
const auto * compared_column = assert_cast<const ColumnType *>(
|
||||
_compared_column);
|
||||
const auto * reference_column = assert_cast<const ColumnType *>(
|
||||
_reference_column);
|
||||
const auto offset = _offset.get<typename ColumnType::ValueType>();
|
||||
assert(offset >= 0);
|
||||
|
||||
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
||||
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||
auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
|
||||
compared_value_data.data);
|
||||
|
||||
const auto reference_value_data = reference_column->getDataAt(reference_row);
|
||||
assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
|
||||
auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
|
||||
reference_value_data.data);
|
||||
|
||||
// Floats overflow to Inf and the comparison will work normally, so we don't
|
||||
// have to do anything.
|
||||
if (offset_is_preceding)
|
||||
{
|
||||
reference_value -= offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
reference_value += offset;
|
||||
}
|
||||
|
||||
const auto result = compared_value < reference_value ? -1
|
||||
: compared_value == reference_value ? 0 : 1;
|
||||
|
||||
// fmt::print(stderr, "compared {}, offset {}, reference {}, result {}\n",
|
||||
// compared_value, offset, reference_value, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper macros to dispatch on type of the ORDER BY column
|
||||
#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
|
||||
else if (typeid_cast<const TYPE *>(column)) \
|
||||
@ -114,14 +164,20 @@ if (false) /* NOLINT */ \
|
||||
{ \
|
||||
/* Do nothing, a starter condition. */ \
|
||||
} \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
|
||||
\
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int128>) \
|
||||
\
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float32>) \
|
||||
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float64>) \
|
||||
\
|
||||
else \
|
||||
{ \
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
|
||||
@ -193,9 +249,28 @@ WindowTransform::WindowTransform(const Block & input_header_,
|
||||
== WindowFrame::BoundaryType::Offset))
|
||||
{
|
||||
assert(order_by_indices.size() == 1);
|
||||
const IColumn * column = input_header.getByPosition(
|
||||
order_by_indices[0]).column.get();
|
||||
const auto & entry = input_header.getByPosition(order_by_indices[0]);
|
||||
const IColumn * column = entry.column.get();
|
||||
APPLY_FOR_TYPES(compareValuesWithOffset)
|
||||
|
||||
// Check that the offset type matches the window type.
|
||||
// Convert the offsets to the ORDER BY column type. We can't just check
|
||||
// that it matches, because e.g. the int literals are always (U)Int64,
|
||||
// but the column might be Int8 and so on.
|
||||
if (window_description.frame.begin_type
|
||||
== WindowFrame::BoundaryType::Offset)
|
||||
{
|
||||
window_description.frame.begin_offset = convertFieldToTypeOrThrow(
|
||||
window_description.frame.begin_offset,
|
||||
*entry.type);
|
||||
}
|
||||
if (window_description.frame.end_type
|
||||
== WindowFrame::BoundaryType::Offset)
|
||||
{
|
||||
window_description.frame.end_offset = convertFieldToTypeOrThrow(
|
||||
window_description.frame.end_offset,
|
||||
*entry.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,7 +466,7 @@ void WindowTransform::advanceFrameStartRowsOffset()
|
||||
{
|
||||
// Just recalculate it each time by walking blocks.
|
||||
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
||||
window_description.frame.begin_offset
|
||||
window_description.frame.begin_offset.get<UInt64>()
|
||||
* (window_description.frame.begin_preceding ? -1 : 1));
|
||||
|
||||
frame_start = moved_row;
|
||||
@ -638,7 +713,7 @@ void WindowTransform::advanceFrameEndRowsOffset()
|
||||
// Walk the specified offset from the current row. The "+1" is needed
|
||||
// because the frame_end is a past-the-end pointer.
|
||||
const auto [moved_row, offset_left] = moveRowNumber(current_row,
|
||||
window_description.frame.end_offset
|
||||
window_description.frame.end_offset.get<UInt64>()
|
||||
* (window_description.frame.end_preceding ? -1 : 1)
|
||||
+ 1);
|
||||
|
||||
@ -852,14 +927,14 @@ void WindowTransform::writeOutCurrentRow()
|
||||
for (size_t wi = 0; wi < workspaces.size(); ++wi)
|
||||
{
|
||||
auto & ws = workspaces[wi];
|
||||
IColumn * result_column = block.output_columns[wi].get();
|
||||
|
||||
if (ws.window_function_impl)
|
||||
{
|
||||
ws.window_function_impl->windowInsertResultInto(*result_column, this);
|
||||
ws.window_function_impl->windowInsertResultInto(this, wi);
|
||||
}
|
||||
else
|
||||
{
|
||||
IColumn * result_column = block.output_columns[wi].get();
|
||||
const auto * a = ws.aggregate_function.get();
|
||||
auto * buf = ws.aggregate_function_state.data();
|
||||
// FIXME does it also allocate the result on the arena?
|
||||
@ -1280,8 +1355,11 @@ struct WindowFunctionRank final : public WindowFunction
|
||||
DataTypePtr getReturnType() const override
|
||||
{ return std::make_shared<DataTypeUInt64>(); }
|
||||
|
||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
||||
void windowInsertResultInto(const WindowTransform * transform,
|
||||
size_t function_index) override
|
||||
{
|
||||
IColumn & to = *transform->blockAt(transform->current_row)
|
||||
.output_columns[function_index];
|
||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||
transform->peer_group_start_row_number);
|
||||
}
|
||||
@ -1297,8 +1375,11 @@ struct WindowFunctionDenseRank final : public WindowFunction
|
||||
DataTypePtr getReturnType() const override
|
||||
{ return std::make_shared<DataTypeUInt64>(); }
|
||||
|
||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
||||
void windowInsertResultInto(const WindowTransform * transform,
|
||||
size_t function_index) override
|
||||
{
|
||||
IColumn & to = *transform->blockAt(transform->current_row)
|
||||
.output_columns[function_index];
|
||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||
transform->peer_group_number);
|
||||
}
|
||||
@ -1314,13 +1395,123 @@ struct WindowFunctionRowNumber final : public WindowFunction
|
||||
DataTypePtr getReturnType() const override
|
||||
{ return std::make_shared<DataTypeUInt64>(); }
|
||||
|
||||
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
|
||||
void windowInsertResultInto(const WindowTransform * transform,
|
||||
size_t function_index) override
|
||||
{
|
||||
IColumn & to = *transform->blockAt(transform->current_row)
|
||||
.output_columns[function_index];
|
||||
assert_cast<ColumnUInt64 &>(to).getData().push_back(
|
||||
transform->current_row_number);
|
||||
}
|
||||
};
|
||||
|
||||
// ClickHouse-specific variant of lag/lead that respects the window frame.
|
||||
template <bool is_lead>
|
||||
struct WindowFunctionLagLeadInFrame final : public WindowFunction
|
||||
{
|
||||
WindowFunctionLagLeadInFrame(const std::string & name_,
|
||||
const DataTypes & argument_types_, const Array & parameters_)
|
||||
: WindowFunction(name_, argument_types_, parameters_)
|
||||
{
|
||||
if (!parameters.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} cannot be parameterized", name_);
|
||||
}
|
||||
|
||||
if (argument_types.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} takes at least one argument", name_);
|
||||
}
|
||||
|
||||
if (argument_types.size() == 1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isInt64FieldType(argument_types[1]->getDefault().getType()))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Offset must be an integer, '{}' given",
|
||||
argument_types[1]->getName());
|
||||
}
|
||||
|
||||
if (argument_types.size() == 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!getLeastSupertype({argument_types[0], argument_types[2]}))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The default value type '{}' is not convertible to the argument type '{}'",
|
||||
argument_types[2]->getName(),
|
||||
argument_types[0]->getName());
|
||||
}
|
||||
|
||||
if (argument_types.size() > 3)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function '{}' accepts at most 3 arguments, {} given",
|
||||
name, argument_types.size());
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{ return argument_types[0]; }
|
||||
|
||||
void windowInsertResultInto(const WindowTransform * transform,
|
||||
size_t function_index) override
|
||||
{
|
||||
const auto & current_block = transform->blockAt(transform->current_row);
|
||||
IColumn & to = *current_block.output_columns[function_index];
|
||||
const auto & workspace = transform->workspaces[function_index];
|
||||
|
||||
int offset = 1;
|
||||
if (argument_types.size() > 1)
|
||||
{
|
||||
offset = (*current_block.input_columns[
|
||||
workspace.argument_column_indices[1]])[
|
||||
transform->current_row.row].get<Int64>();
|
||||
if (offset < 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The offset for function {} must be nonnegative, {} given",
|
||||
getName(), offset);
|
||||
}
|
||||
}
|
||||
|
||||
const auto [target_row, offset_left] = transform->moveRowNumber(
|
||||
transform->current_row, offset * (is_lead ? 1 : -1));
|
||||
|
||||
if (offset_left != 0
|
||||
|| target_row < transform->frame_start
|
||||
|| transform->frame_end <= target_row)
|
||||
{
|
||||
// Offset is outside the frame.
|
||||
if (argument_types.size() > 2)
|
||||
{
|
||||
// Column with default values is specified.
|
||||
to.insertFrom(*current_block.input_columns[
|
||||
workspace.argument_column_indices[2]],
|
||||
transform->current_row.row);
|
||||
}
|
||||
else
|
||||
{
|
||||
to.insertDefault();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Offset is inside the frame.
|
||||
to.insertFrom(*transform->blockAt(target_row).input_columns[
|
||||
workspace.argument_column_indices[0]],
|
||||
target_row.row);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void registerWindowFunctions(AggregateFunctionFactory & factory)
|
||||
{
|
||||
// Why didn't I implement lag/lead yet? Because they are a mess. I imagine
|
||||
@ -1332,9 +1523,10 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
||||
// the whole partition like Postgres does, because using a linear amount
|
||||
// of additional memory is not an option when we have a lot of data. We must
|
||||
// be able to process at least the lag/lead in streaming fashion.
|
||||
// Our best bet is probably rewriting, say `lag(value, offset)` to
|
||||
// `any(value) over (rows between offset preceding and offset preceding)`,
|
||||
// at the query planning stage.
|
||||
// A partial solution for constant offsets is rewriting, say `lag(value, offset)
|
||||
// to `any(value) over (rows between offset preceding and offset preceding)`.
|
||||
// We also implement non-standard functions `lag/leadInFrame`, that are
|
||||
// analogous to `lag/lead`, but respect the frame.
|
||||
// Functions like cume_dist() do require materializing the entire
|
||||
// partition, but it's probably also simpler to implement them by rewriting
|
||||
// to a (rows between unbounded preceding and unbounded following) frame,
|
||||
@ -1360,6 +1552,20 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
|
||||
return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
|
||||
parameters);
|
||||
});
|
||||
|
||||
factory.registerFunction("lagInFrame", [](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return std::make_shared<WindowFunctionLagLeadInFrame<false>>(
|
||||
name, argument_types, parameters);
|
||||
});
|
||||
|
||||
factory.registerFunction("leadInFrame", [](const std::string & name,
|
||||
const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
|
||||
name, argument_types, parameters);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -110,7 +110,9 @@ public:
|
||||
Status prepare() override;
|
||||
void work() override;
|
||||
|
||||
private:
|
||||
/*
|
||||
* Implementation details.
|
||||
*/
|
||||
void advancePartitionEnd();
|
||||
|
||||
bool arePeers(const RowNumber & x, const RowNumber & y) const;
|
||||
@ -321,10 +323,7 @@ public:
|
||||
int (* compare_values_with_offset) (
|
||||
const IColumn * compared_column, size_t compared_row,
|
||||
const IColumn * reference_column, size_t reference_row,
|
||||
// We can make it a Field later if we need the Decimals. Now we only
|
||||
// have ints and datetime, and the underlying Field type for them is
|
||||
// uint64_t anyway.
|
||||
uint64_t offset,
|
||||
const Field & offset,
|
||||
bool offset_is_preceding);
|
||||
};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user