Merge branch 'master' of github.com:ClickHouse/ClickHouse into disk-async-read

This commit is contained in:
kssenii 2021-10-27 22:47:47 +03:00
commit 9461e225bb
256 changed files with 8057 additions and 1576 deletions

13
.github/workflows/cancel.yml vendored Normal file
View File

@ -0,0 +1,13 @@
name: Cancel
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions"]
types:
- requested
jobs:
cancel:
runs-on: [self-hosted, style-checker]
steps:
- uses: styfle/cancel-workflow-action@0.9.1
with:
workflow_id: ${{ github.event.workflow.id }}

View File

@ -1,4 +1,4 @@
name: Ligthweight GithubActions
name: CIGithubActions
on: # yamllint disable-line rule:truthy
pull_request:
types:
@ -11,20 +11,25 @@ on: # yamllint disable-line rule:truthy
- master
jobs:
CheckLabels:
runs-on: [self-hosted]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 run_check.py
DockerHubPush:
needs: CheckLabels
runs-on: [self-hosted]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
@ -32,7 +37,7 @@ jobs:
path: ${{ runner.temp }}/docker_images_check/changed_images.json
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted]
runs-on: [self-hosted, style-checker]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
@ -42,12 +47,82 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v2
- name: Style Check
run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 style_check.py
BuilderDebDebug:
needs: DockerHubPush
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/build_check
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 7
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
BuilderReport:
needs: [BuilderDebDebug]
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
FastTest:
needs: DockerHubPush
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Fast Test
env:
TEMP_PATH: ${{runner.temp}}/fasttest
REPO_COPY: ${{runner.temp}}/fasttest/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 fast_test_check.py
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels]
runs-on: [self-hosted]
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 finish_check.py

View File

@ -149,8 +149,6 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_PROTOBUF ON)
set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
endif()
# Global libraries

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 7ecb16844af6a9c283ad432d85ecc2e7d1544676
Subproject commit d10351f312c1ae1ca3fdda433693dfbef3acfece

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 06aa8759d17f2032ffd5efa83969270ca9ac727b
Subproject commit 00b03604543367d7e310cb0993973fdcb723ea79

View File

@ -17,6 +17,16 @@ endif ()
add_subdirectory("${protobuf_SOURCE_DIR}/cmake" "${protobuf_BINARY_DIR}")
if (ENABLE_FUZZING)
# `protoc` will be built with sanitizer and it could fail during ClickHouse build
# It easily reproduces in oss-fuzz building pipeline
# To avoid this we can try to build `protoc` without any sanitizer with option `-fno-sanitize=all`, but
# it this case we will face with linker errors, because libcxx still will be built with sanitizer
# So, we can simply suppress all of these failures with a combination this flag and an environment variable
# export MSAN_OPTIONS=exit_code=0
target_compile_options(protoc PRIVATE "-fsanitize-recover=all")
endif()
# We don't want to stop compilation on warnings in protobuf's headers.
# The following line overrides the value assigned by the command target_include_directories() in libprotobuf.cmake
set_property(TARGET libprotobuf PROPERTY INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${protobuf_SOURCE_DIR}/src")

View File

@ -12,19 +12,19 @@ printenv
rm -f CMakeCache.txt
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
# Hope, that the most part of files will be in cache, so we just link new executables
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \
-DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \
-DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \
-DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \
-DENABLE_CHECK_HEAVY_BUILDS=1 -DGLIBC_COMPATIBILITY=OFF "${CMAKE_FLAGS[@]}" ..
# Please, add or change flags directly in cmake
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \
-DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 -DUSE_INTERNAL_PROTOBUF_LIBRARY=1 "${CMAKE_FLAGS[@]}" ..
FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ')
NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo)))
mkdir -p /output/fuzzers
for FUZZER_TARGET in $FUZZER_TARGETS
do
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS $FUZZER_TARGET
ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS
# Find this binary in build directory and strip it
FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET")
strip --strip-unneeded "$FUZZER_PATH"

View File

@ -10,7 +10,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
pylint \
yamllint \
&& pip3 install codespell
&& pip3 install codespell PyGithub boto3 unidiff
COPY run.sh /
COPY process_style_check_result.py /

View File

@ -35,6 +35,8 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c
When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata).
[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
## Usage Example {#usage-example}
Creating a cluster with three hosts:

View File

@ -129,6 +129,20 @@ Features:
- Support monitor (processor, connection, query)
- Support migrate data
### Bytebase {#bytebase}
[Bytebase](https://bytebase.com) is a web-based, open source schema change and version control tool for teams. It supports various databases including ClickHouse.
Features:
- Schema review between developers and DBAs.
- Database-as-Code, version control the schema in VCS such GitLab and trigger the deployment upon code commit.
- Streamlined deployment with per-environment policy.
- Full migration history.
- Schema drift detection.
- Backup and restore.
- RBAC.
## Commercial {#commercial}
### DataGrip {#datagrip}

View File

@ -3830,3 +3830,86 @@ Default value: `0`.
**See Also**
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query.
If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted.
Possible values:
- 0 — Insertions are made synchronously, one after another.
- 1 — Multiple asynchronous insertions enabled.
Default value: `0`.
## async_insert_threads {#async-insert-threads}
The maximum number of threads for background data parsing and insertion.
Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `16`.
## wait_for_async_insert {#wait-for-async-insert}
Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted.
Possible values:
- 0 — Server returns `OK` even if the data is not yet inserted.
- 1 — Server returns `OK` only after the data is inserted.
Default value: `1`.
## wait_for_async_insert_timeout {#wait-for-async-insert-timeout}
The timeout in seconds for waiting for processing of asynchronous insertion.
Possible values:
- Positive integer.
- 0 — Disabled.
Default value: [lock_acquire_timeout](#lock_acquire_timeout).
## async_insert_max_data_size {#async-insert-max-data-size}
The maximum size of the unparsed data in bytes collected per query before being inserted.
Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `1000000`.
## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data.
Possible values:
- Positive integer.
- 0 — Timeout disabled.
Default value: `200`.
## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded.
Possible values:
- Positive integer.
- 0 — Timeout disabled.
Default value: `0`.

View File

@ -0,0 +1,43 @@
---
toc_priority: 302
---
# entropy {#entropy}
Calculates [Shannon entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) of a column of values.
**Syntax**
``` sql
entropy(val)
```
**Arguments**
- `val` — Column of values of any type.
**Returned value**
- Shannon entropy.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
CREATE TABLE entropy (`vals` UInt32,`strings` String) ENGINE = Memory;
INSERT INTO entropy VALUES (1, 'A'), (1, 'A'), (1,'A'), (1,'A'), (2,'B'), (2,'B'), (2,'C'), (2,'D');
SELECT entropy(vals), entropy(strings) FROM entropy;
```
Result:
``` text
┌─entropy(vals)─┬─entropy(strings)─┐
│ 1 │ 1.75 │
└───────────────┴──────────────────┘
```

View File

@ -82,3 +82,76 @@ An exception is thrown when dividing by zero or when dividing a minimal negative
Returns the least common multiple of the numbers.
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
## max2 {#max2}
Compares two values and returns the maximum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md).
**Syntax**
```sql
max2(value1, value2)
```
**Arguments**
- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Returned value**
- The maximum of two values.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT max2(-1, 2);
```
Result:
```text
┌─max2(-1, 2)─┐
│ 2 │
└─────────────┘
```
## min2 {#min2}
Compares two values and returns the minimum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md).
**Syntax**
```sql
max2(value1, value2)
```
**Arguments**
- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Returned value**
- The minimum of two values.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT min2(-1, 2);
```
Result:
```text
┌─min2(-1, 2)─┐
│ -1 │
└─────────────┘
```

View File

@ -213,7 +213,7 @@ SELECT splitByNonAlpha(' 1! a, b. ');
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Concatenates the strings listed in the array with the separator.separator is an optional parameter: a constant string, set to an empty string by default.
Concatenates the strings (values of type String or Nullable(String)) listed in the array with the separator. separator is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
## alphaTokens(s) {#alphatokenss}

View File

@ -165,3 +165,805 @@ Result:
│ 2 │
└─────────────────┘
```
## tuplePlus {#tupleplus}
Calculates the sum of corresponding values of two tuples of the same size.
**Syntax**
```sql
tuplePlus(tuple1, tuple2)
```
Alias: `vectorSum`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Tuple with the sum.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tuplePlus((1, 2), (2, 3));
```
Result:
```text
┌─tuplePlus((1, 2), (2, 3))─┐
│ (3,5) │
└───────────────────────────┘
```
## tupleMinus {#tupleminus}
Calculates the subtraction of corresponding values of two tuples of the same size.
**Syntax**
```sql
tupleMinus(tuple1, tuple2)
```
Alias: `vectorDifference`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Tuple with the result of subtraction.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleMinus((1, 2), (2, 3));
```
Result:
```text
┌─tupleMinus((1, 2), (2, 3))─┐
│ (-1,-1) │
└────────────────────────────┘
```
## tupleMultiply {#tuplemultiply}
Calculates the multiplication of corresponding values of two tuples of the same size.
**Syntax**
```sql
tupleMultiply(tuple1, tuple2)
```
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Tuple with the multiplication.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleMultiply((1, 2), (2, 3));
```
Result:
```text
┌─tupleMultiply((1, 2), (2, 3))─┐
│ (2,6) │
└───────────────────────────────┘
```
## tupleDivide {#tupledivide}
Calculates the division of corresponding values of two tuples of the same size. Note that division by zero will return `inf`.
**Syntax**
```sql
tupleDivide(tuple1, tuple2)
```
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Tuple with the result of division.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleDivide((1, 2), (2, 3));
```
Result:
```text
┌─tupleDivide((1, 2), (2, 3))─┐
│ (0.5,0.6666666666666666) │
└─────────────────────────────┘
```
## tupleNegate {#tuplenegate}
Calculates the negation of the tuple values.
**Syntax**
```sql
tupleNegate(tuple)
```
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Tuple with the result of negation.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleNegate((1, 2));
```
Result:
```text
┌─tupleNegate((1, 2))─┐
│ (-1,-2) │
└─────────────────────┘
```
## tupleMultiplyByNumber {#tuplemultiplybynumber}
Returns a tuple with all values multiplied by a number.
**Syntax**
```sql
tupleMultiplyByNumber(tuple, number)
```
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Returned value**
- Tuple with multiplied values.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleMultiplyByNumber((1, 2), -2.1);
```
Result:
```text
┌─tupleMultiplyByNumber((1, 2), -2.1)─┐
│ (-2.1,-4.2) │
└─────────────────────────────────────┘
```
## tupleDivideByNumber {#tupledividebynumber}
Returns a tuple with all values divided by a number. Note that division by zero will return `inf`.
**Syntax**
```sql
tupleDivideByNumber(tuple, number)
```
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Returned value**
- Tuple with divided values.
Type: [Tuple](../../sql-reference/data-types/tuple.md).
**Example**
Query:
```sql
SELECT tupleDivideByNumber((1, 2), 0.5);
```
Result:
```text
┌─tupleDivideByNumber((1, 2), 0.5)─┐
│ (2,4) │
└──────────────────────────────────┘
```
## dotProduct {#dotproduct}
Calculates the scalar product of two tuples of the same size.
**Syntax**
```sql
dotProduct(tuple1, tuple2)
```
Alias: `scalarProduct`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Scalar product.
Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Example**
Query:
```sql
SELECT dotProduct((1, 2), (2, 3));
```
Result:
```text
┌─dotProduct((1, 2), (2, 3))─┐
│ 8 │
└────────────────────────────┘
```
## L1Norm {#l1norm}
Calculates the sum of absolute values of a tuple.
**Syntax**
```sql
L1Norm(tuple)
```
Alias: `normL1`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance.
Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Example**
Query:
```sql
SELECT L1Norm((1, 2));
```
Result:
```text
┌─L1Norm((1, 2))─┐
│ 3 │
└────────────────┘
```
## L2Norm {#l2norm}
Calculates the square root of the sum of the squares of the tuple values.
**Syntax**
```sql
L2Norm(tuple)
```
Alias: `normL2`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance).
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2Norm((1, 2));
```
Result:
```text
┌───L2Norm((1, 2))─┐
│ 2.23606797749979 │
└──────────────────┘
```
## LinfNorm {#linfnorm}
Calculates the maximum of absolute values of a tuple.
**Syntax**
```sql
LinfNorm(tuple)
```
Alias: `normLinf`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Linf-norm or the maximum absolute value.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LinfNorm((1, -2));
```
Result:
```text
┌─LinfNorm((1, -2))─┐
│ 2 │
└───────────────────┘
```
## LpNorm {#lpnorm}
Calculates the root of `p`-th power of the sum of the absolute values of a tuple in the power of `p`.
**Syntax**
```sql
LpNorm(tuple, p)
```
Alias: `normLp`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Returned value**
- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LpNorm((1, -2), 2);
```
Result:
```text
┌─LpNorm((1, -2), 2)─┐
│ 2.23606797749979 │
└────────────────────┘
```
## L1Distance {#l1distance}
Calculates the distance between two points (the values of the tuples are the coordinates) in `L1` space (1-norm ([taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance)).
**Syntax**
```sql
L1Distance(tuple1, tuple2)
```
Alias: `distanceL1`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple1` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- 1-norm distance.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L1Distance((1, 2), (2, 3));
```
Result:
```text
┌─L1Distance((1, 2), (2, 3))─┐
│ 2 │
└────────────────────────────┘
```
## L2Distance {#l2distance}
Calculates the distance between two points (the values of the tuples are the coordinates) in Euclidean space ([Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance)).
**Syntax**
```sql
L2Distance(tuple1, tuple2)
```
Alias: `distanceL2`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple1` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- 2-norm distance.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2Distance((1, 2), (2, 3));
```
Result:
```text
┌─L2Distance((1, 2), (2, 3))─┐
│ 1.4142135623730951 │
└────────────────────────────┘
```
## LinfDistance {#linfdistance}
Calculates the distance between two points (the values of the tuples are the coordinates) in `L_{inf}` space ([maximum norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#Maximum_norm_(special_case_of:_infinity_norm,_uniform_norm,_or_supremum_norm))).
**Syntax**
```sql
LinfDistance(tuple1, tuple2)
```
Alias: `distanceLinf`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple1` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Infinity-norm distance.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LinfDistance((1, 2), (2, 3));
```
Result:
```text
┌─LinfDistance((1, 2), (2, 3))─┐
│ 1 │
└──────────────────────────────┘
```
## LpDistance {#lpdistance}
Calculates the distance between two points (the values of the tuples are the coordinates) in `Lp` space ([p-norm distance](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql
LpDistance(tuple1, tuple2, p)
```
Alias: `distanceLp`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple1` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Returned value**
- p-norm distance.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LpDistance((1, 2), (2, 3), 3);
```
Result:
```text
┌─LpDistance((1, 2), (2, 3), 3)─┐
│ 1.2599210498948732 │
└───────────────────────────────┘
```
## L1Normalize {#l1normalize}
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `L1` space ([taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry)).
**Syntax**
```sql
L1Normalize(tuple)
```
Alias: `normalizeL1`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Unit vector.
Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L1Normalize((1, 2));
```
Result:
```text
┌─L1Normalize((1, 2))─────────────────────┐
│ (0.3333333333333333,0.6666666666666666) │
└─────────────────────────────────────────┘
```
## L2Normalize {#l2normalize}
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in Euclidean space (using [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance)).
**Syntax**
```sql
L2Normalize(tuple)
```
Alias: `normalizeL1`.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Unit vector.
Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2Normalize((3, 4));
```
Result:
```text
┌─L2Normalize((3, 4))─┐
│ (0.6,0.8) │
└─────────────────────┘
```
## LinfNormalize {#linfnormalize}
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `L_{inf}` space (using [maximum norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#Maximum_norm_(special_case_of:_infinity_norm,_uniform_norm,_or_supremum_norm))).
**Syntax**
```sql
LinfNormalize(tuple)
```
Alias: `normalizeLinf `.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Unit vector.
Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LinfNormalize((3, 4));
```
Result:
```text
┌─LinfNormalize((3, 4))─┐
│ (0.75,1) │
└───────────────────────┘
```
## LpNormalize {#lpnormalize}
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `Lp` space (using [p-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql
LpNormalize(tuple, p)
```
Alias: `normalizeLp `.
**Arguments**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Returned value**
- Unit vector.
Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT LpNormalize((3, 4),5);
```
Result:
```text
┌─LpNormalize((3, 4), 5)──────────────────┐
│ (0.7187302630182624,0.9583070173576831) │
└─────────────────────────────────────────┘
```
## cosineDistance {#cosinedistance}
Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors.
**Syntax**
```sql
cosineDistance(tuple1, tuple2)
```
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Cosine of the angle between two vectors substracted from one.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT cosineDistance((1, 2), (2, 3));
```
Result:
```text
┌─cosineDistance((1, 2), (2, 3))─┐
│ 0.007722123286332261 │
└────────────────────────────────┘
```

View File

@ -17,20 +17,30 @@ ClickHouse transforms operators to their corresponding functions at the query pa
`-a` The `negate (a)` function.
For tuple negation: [tupleNegate](../../sql-reference/functions/tuple-functions.md#tuplenegate).
## Multiplication and Division Operators {#multiplication-and-division-operators}
`a * b` The `multiply (a, b)` function.
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar profuct: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
`a / b` The `divide(a, b)` function.
For dividing tuple by number: [tupleDivideByNumber](../../sql-reference/functions/tuple-functions.md#tupledividebynumber).
`a % b` The `modulo(a, b)` function.
## Addition and Subtraction Operators {#addition-and-subtraction-operators}
`a + b` The `plus(a, b)` function.
For tuple addiction: [tuplePlus](../../sql-reference/functions/tuple-functions.md#tupleplus).
`a - b` The `minus(a, b)` function.
For tuple subtraction: [tupleMinus](../../sql-reference/functions/tuple-functions.md#tupleminus).
## Comparison Operators {#comparison-operators}
`a = b` The `equals(a, b)` function.
@ -71,6 +81,53 @@ ClickHouse transforms operators to their corresponding functions at the query pa
`a GLOBAL NOT IN ...` The `globalNotIn(a, b)` function.
`a = ANY (subquery)` The `in(a, subquery)` function.
`a != ANY (subquery)` The same as `a NOT IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a = ALL (subquery)` The same as `a IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a != ALL (subquery)` The `notIn(a, subquery)` function.
**Examples**
Query with ALL:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ALL (SELECT number FROM numbers(3, 3));
```
Result:
``` text
┌─a─┐
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
Query with ANY:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ANY (SELECT number FROM numbers(3, 3));
```
Result:
``` text
┌─a─┐
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
## Operators for Working with Dates and Times {#operators-datetime}
### EXTRACT {#operator-extract}

View File

@ -119,3 +119,14 @@ Performance will not decrease if:
- Data is added in real time.
- You upload data that is usually sorted by time.
It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them.
**See Also**
- [async_insert](../../operations/settings/settings.md#async-insert)
- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads)
- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert)
- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout)
- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size)
- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms)
- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms)

View File

@ -0,0 +1,69 @@
---
toc_title: EXCEPT
---
# EXCEPT Clause {#except-clause}
The `EXCEPT` clause returns only those rows that result from the first query without the second. The queries must match the number of columns, order, and type. The result of `EXCEPT` can contain duplicate rows.
Multiple `EXCEPT` statements are executed left to right if parenthesis are not specified. The `EXCEPT` operator has the same priority as the `UNION` clause and lower priority than the `INTERSECT` clause.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
EXCEPT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
The condition could be any expression based on your requirements.
**Examples**
Query:
``` sql
SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6);
```
Result:
``` text
┌─number─┐
│ 1 │
│ 2 │
│ 9 │
│ 10 │
└────────┘
```
Query:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
```
Result:
``` text
┌─one─┬─two─┬─three─┐
│ l │ p │ o │
│ k │ t │ d │
│ l │ p │ o │
└─────┴─────┴───────┘
```
**See Also**
- [UNION](union.md#union-clause)
- [INTERSECT](intersect.md#intersect-clause)

View File

@ -49,6 +49,8 @@ Specifics of each optional clause are covered in separate sections, which are li
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [OFFSET clause](../../../sql-reference/statements/select/offset.md)
- [UNION clause](../../../sql-reference/statements/select/union.md)
- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md)
- [EXCEPT clause](../../../sql-reference/statements/select/except.md)
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT clause](../../../sql-reference/statements/select/format.md)

View File

@ -0,0 +1,73 @@
---
toc_title: INTERSECT
---
# INTERSECT Clause {#intersect-clause}
The `INTERSECT` clause returns only those rows that result from both the first and the second queries. The queries must match the number of columns, order, and type. The result of `INTERSECT` can contain duplicate rows.
Multiple `INTERSECT` statements are executes left to right if parenthesis are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clause.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
INTERSECT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
The condition could be any expression based on your requirements.
**Examples**
Query:
``` sql
SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6);
```
Result:
``` text
┌─number─┐
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
└────────┘
```
Query:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 INTERSECT SELECT * FROM t2;
```
Result:
``` text
┌─one─┬─two─┬─three─┐
│ q │ m │ b │
│ s │ d │ f │
│ s │ d │ f │
│ s │ d │ f │
└─────┴─────┴───────┘
```
**See Also**
- [UNION](union.md#union-clause)
- [EXCEPT](except.md#except-clause)

View File

@ -3640,3 +3640,87 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
**См. также**
- настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
## async_insert {#async-insert}
Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.
Если включено, данные собираются в пачки перед вставкой в таблицу. Это позволяет производить мелкие и частые вставки в ClickHouse (до 15000 запросов в секунду) без промежуточных таблиц.
Вставка данных происходит либо как только объем вставляемых данных превышает [async_insert_max_data_size](#async-insert-max-data-size), либо через [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) миллисекунд после первого запроса `INSERT`. Если в [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) задано ненулевое значение, то данные вставляются через `async_insert_stale_timeout_ms` миллисекунд после последнего запроса.
Если включен параметр [wait_for_async_insert](#wait-for-async-insert), каждый клиент ждет, пока данные будут сброшены в таблицу. Иначе запрос будет обработан почти моментально, даже если данные еще не вставлены.
Возможные значения:
- 0 — вставки производятся синхронно, один запрос за другим.
- 1 — включены множественные асинхронные вставки.
Значение по умолчанию: `0`.
## async_insert_threads {#async-insert-threads}
Максимальное число потоков для фоновой обработки и вставки данных.
Возможные значения:
- Положительное целое число.
- 0 — асинхронные вставки отключены.
Значение по умолчанию: `16`.
## wait_for_async_insert {#wait-for-async-insert}
Включает или отключает ожидание обработки асинхронных вставок. Если включено, клиент выведет `OK` только после того, как данные вставлены. Иначе будет выведен `OK`, даже если вставка не произошла.
Возможные значения:
- 0 — сервер возвращает `OK` даже если вставка данных еще не завершена.
- 1 — сервер возвращает `OK` только после завершения вставки данных.
Значение по умолчанию: `1`.
## wait_for_async_insert_timeout {#wait-for-async-insert-timeout}
Время ожидания в секундах, выделяемое для обработки асинхронной вставки.
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: [lock_acquire_timeout](#lock_acquire_timeout).
## async_insert_max_data_size {#async-insert-max-data-size}
Максимальный размер необработанных данных (в байтах), собранных за запрос, перед их вставкой.
Возможные значения:
- Положительное целое число.
- 0 — асинхронные вставки отключены.
Значение по умолчанию: `1000000`.
## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
Максимальное время ожидания в миллисекундах после первого запроса `INSERT` и перед вставкой данных.
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: `200`.
## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
Максимальное время ожидания в миллисекундах после последнего запроса `INSERT` и перед вставкой данных. Если установлено ненулевое значение, [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) будет продлеваться с каждым запросом `INSERT`, пока не будет превышен [async_insert_max_data_size](#async-insert-max-data-size).
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: `0`.

View File

@ -0,0 +1,43 @@
---
toc_priority: 302
---
# entropy {#entropy}
Вычисляет [информационную энтропию](https://ru.wikipedia.org/wiki/%D0%98%D0%BD%D1%84%D0%BE%D1%80%D0%BC%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%BD%D0%B0%D1%8F_%D1%8D%D0%BD%D1%82%D1%80%D0%BE%D0%BF%D0%B8%D1%8F) столбца данных.
**Синтаксис**
``` sql
entropy(val)
```
**Аргументы**
- `val` — столбец значений любого типа
**Возвращаемое значение**
- Информационная энтропия.
Тип: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
CREATE TABLE entropy (`vals` UInt32,`strings` String) ENGINE = Memory;
INSERT INTO entropy VALUES (1, 'A'), (1, 'A'), (1,'A'), (1,'A'), (2,'B'), (2,'B'), (2,'C'), (2,'D');
SELECT entropy(vals), entropy(strings) FROM entropy;
```
Результат:
``` text
┌─entropy(vals)─┬─entropy(strings)─┐
│ 1 │ 1.75 │
└───────────────┴──────────────────┘
```

View File

@ -83,3 +83,78 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0
Вычисляет наименьшее общее кратное чисел.
При делении на ноль или при делении минимального отрицательного числа на минус единицу, кидается исключение.
## max2 {#max2}
Сравнивает два числа и возвращает максимум. Возвращаемое значение приводится к типу [Float64](../../sql-reference/data-types/float.md).
**Синтаксис**
```sql
max2(value1, value2)
```
**Аргументы**
- `value1` — первое число. [Int/UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
- `value2` — второе число. [Int/UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Максимальное значение среди двух чисел.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT max2(-1, 2);
```
Результат:
```text
┌─max2(-1, 2)─┐
│ 2 │
└─────────────┘
```
## min2 {#min2}
Сравнивает два числа и возвращает минимум. Возвращаемое значение приводится к типу [Float64](../../sql-reference/data-types/float.md).
**Синтаксис**
```sql
min2(value1, value2)
```
**Аргументы**
- `value1` — первое число. [Int/UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
- `value2` — второе число. [Int/UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Минимальное значение среди двух чисел.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT min2(-1, 2);
```
Результат:
```text
┌─min2(-1, 2)─┐
│ -1 │
└─────────────┘
```

View File

@ -163,3 +163,805 @@ SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseIn
│ 2 │
└─────────────────┘
```
## tuplePlus {#tupleplus}
Вычисляет сумму соответствующих значений двух кортежей одинакового размера.
**Синтаксис**
```sql
tuplePlus(tuple1, tuple2)
```
Синоним: `vectorSum`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Кортеж с суммами.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tuplePlus((1, 2), (2, 3));
```
Результат:
```text
┌─tuplePlus((1, 2), (2, 3))─┐
│ (3,5) │
└───────────────────────────┘
```
## tupleMinus {#tupleminus}
Вычисляет разность соответствующих значений двух кортежей одинакового размера.
**Синтаксис**
```sql
tupleMinus(tuple1, tuple2)
```
Синоним: `vectorDifference`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Кортеж с разностями.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleMinus((1, 2), (2, 3));
```
Результат:
```text
┌─tupleMinus((1, 2), (2, 3))─┐
│ (-1,-1) │
└────────────────────────────┘
```
## tupleMultiply {#tuplemultiply}
Вычисляет произведение соответствующих значений двух кортежей одинакового размера.
**Синтаксис**
```sql
tupleMultiply(tuple1, tuple2)
```
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Кортеж с произведениями.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleMultiply((1, 2), (2, 3));
```
Результат:
```text
┌─tupleMultiply((1, 2), (2, 3))─┐
│ (2,6) │
└───────────────────────────────┘
```
## tupleDivide {#tupledivide}
Вычисляет частное соответствующих значений двух кортежей одинакового размера. Обратите внимание, что при делении на ноль возвращается значение `inf`.
**Синтаксис**
```sql
tupleDivide(tuple1, tuple2)
```
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Кортеж с частными.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleDivide((1, 2), (2, 3));
```
Результат:
```text
┌─tupleDivide((1, 2), (2, 3))─┐
│ (0.5,0.6666666666666666) │
└─────────────────────────────┘
```
## tupleNegate {#tuplenegate}
Применяет отрицание ко всем значениям кортежа.
**Синтаксис**
```sql
tupleNegate(tuple)
```
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Кортеж с результатом отрицания.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleNegate((1, 2));
```
Результат:
```text
┌─tupleNegate((1, 2))─┐
│ (-1,-2) │
└─────────────────────┘
```
## tupleMultiplyByNumber {#tuplemultiplybynumber}
Возвращает кортеж, в котором значения всех элементов умножены на заданное число.
**Синтаксис**
```sql
tupleMultiplyByNumber(tuple, number)
```
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `number` — множитель. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Decimal](../../sql-reference/data-types/decimal.md).
**Возвращаемое значение**
- Кортеж с результатами умножения на число.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleMultiplyByNumber((1, 2), -2.1);
```
Результат:
```text
┌─tupleMultiplyByNumber((1, 2), -2.1)─┐
│ (-2.1,-4.2) │
└─────────────────────────────────────┘
```
## tupleDivideByNumber {#tupledividebynumber}
Возвращает кортеж, в котором значения всех элементов поделены на заданное число. Обратите внимание, что при делении на ноль возвращается значение `inf`.
**Синтаксис**
```sql
tupleDivideByNumber(tuple, number)
```
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `number` — делитель. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Возвращаемое значение**
- Кортеж с результатами деления на число.
Тип: [Tuple](../../sql-reference/data-types/tuple.md).
**Пример**
Запрос:
```sql
SELECT tupleDivideByNumber((1, 2), 0.5);
```
Результат:
```text
┌─tupleDivideByNumber((1, 2), 0.5)─┐
│ (2,4) │
└──────────────────────────────────┘
```
## dotProduct {#dotproduct}
Вычисляет скалярное произведение двух кортежей одинакового размера.
**Синтаксис**
```sql
dotProduct(tuple1, tuple2)
```
Синоним: `scalarProduct`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Скалярное произведение.
Тип: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Decimal](../../sql-reference/data-types/decimal.md).
**Пример**
Запрос:
```sql
SELECT dotProduct((1, 2), (2, 3));
```
Результат:
```text
┌─dotProduct((1, 2), (2, 3))─┐
│ 8 │
└────────────────────────────┘
```
## L1Norm {#l1norm}
Вычисляет сумму абсолютных значений кортежа.
**Синтаксис**
```sql
L1Norm(tuple)
```
Синоним: `normL1`.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- L1-норма или [расстояние городских кварталов](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%B3%D0%BE%D1%80%D0%BE%D0%B4%D1%81%D0%BA%D0%B8%D1%85_%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB%D0%BE%D0%B2).
Тип: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Decimal](../../sql-reference/data-types/decimal.md).
**Пример**
Запрос:
```sql
SELECT L1Norm((1, 2));
```
Результат:
```text
┌─L1Norm((1, 2))─┐
│ 3 │
└────────────────┘
```
## L2Norm {#l2norm}
Вычисляет квадратный корень из суммы квадратов значений кортежа.
**Синтаксис**
```sql
L2Norm(tuple)
```
Синоним: `normL2`.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- L2-норма или [Евклидово расстояние](https://ru.wikipedia.org/wiki/%D0%95%D0%B2%D0%BA%D0%BB%D0%B8%D0%B4%D0%BE%D0%B2%D0%B0_%D0%BC%D0%B5%D1%82%D1%80%D0%B8%D0%BA%D0%B0).
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT L2Norm((1, 2));
```
Результат:
```text
┌───L2Norm((1, 2))─┐
│ 2.23606797749979 │
└──────────────────┘
```
## LinfNorm {#linfnorm}
Вычисляет максимум из абсолютных значений кортежа.
**Синтаксис**
```sql
LinfNorm(tuple)
```
Синоним: `normLinf`.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Linf-норма или максимальное абсолютное значение.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LinfNorm((1, -2));
```
Результат:
```text
┌─LinfNorm((1, -2))─┐
│ 2 │
└───────────────────┘
```
## LpNorm {#lpnorm}
Возвращает корень степени `p` из суммы абсолютных значений кортежа, возведенных в степень `p`.
**Синтаксис**
```sql
LpNorm(tuple, p)
```
Синоним: `normLp`.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — степень. Возможные значение: любое число из промежутка [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- [Lp-норма](https://ru.wikipedia.org/wiki/%D0%9D%D0%BE%D1%80%D0%BC%D0%B0_(%D0%BC%D0%B0%D1%82%D0%B5%D0%BC%D0%B0%D1%82%D0%B8%D0%BA%D0%B0)#%D0%9D%D0%B5%D0%BA%D0%BE%D1%82%D0%BE%D1%80%D1%8B%D0%B5_%D0%B2%D0%B8%D0%B4%D1%8B_%D0%BC%D0%B0%D1%82%D1%80%D0%B8%D1%87%D0%BD%D1%8B%D1%85_%D0%BD%D0%BE%D1%80%D0%BC)
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LpNorm((1, -2),2);
```
Результат:
```text
┌─LpNorm((1, -2), 2)─┐
│ 2.23606797749979 │
└────────────────────┘
```
## L1Distance {#l1distance}
Вычисляет расстояние между двумя точками (значения кортежей — координаты точек) в пространстве `L1` ([расстояние городских кварталов](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%B3%D0%BE%D1%80%D0%BE%D0%B4%D1%81%D0%BA%D0%B8%D1%85_%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB%D0%BE%D0%B2)).
**Синтаксис**
```sql
L1Distance(tuple1, tuple2)
```
Синоним: `distanceL1`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Расстояние в норме L1.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT L1Distance((1, 2), (2, 3));
```
Результат:
```text
┌─L1Distance((1, 2), (2, 3))─┐
│ 2 │
└────────────────────────────┘
```
## L2Distance {#l2distance}
Вычисляет расстояние между двумя точками (значения кортежей — координаты точек) в пространстве `L2` ([Евклидово расстояние](https://ru.wikipedia.org/wiki/%D0%95%D0%B2%D0%BA%D0%BB%D0%B8%D0%B4%D0%BE%D0%B2%D0%B0_%D0%BC%D0%B5%D1%82%D1%80%D0%B8%D0%BA%D0%B0)).
**Синтаксис**
```sql
L2Distance(tuple1, tuple2)
```
Синоним: `distanceL2`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Расстояние в норме L2.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT L2Distance((1, 2), (2, 3));
```
Результат:
```text
┌─L2Distance((1, 2), (2, 3))─┐
│ 1.4142135623730951 │
└────────────────────────────┘
```
## LinfDistance {#linfdistance}
Вычисляет расстояние между двумя точками (значения кортежей — координаты точек) в пространстве [`L_{inf}`](https://ru.wikipedia.org/wiki/%D0%9D%D0%BE%D1%80%D0%BC%D0%B0_(%D0%BC%D0%B0%D1%82%D0%B5%D0%BC%D0%B0%D1%82%D0%B8%D0%BA%D0%B0)#%D0%9D%D0%B5%D0%BA%D0%BE%D1%82%D0%BE%D1%80%D1%8B%D0%B5_%D0%B2%D0%B8%D0%B4%D1%8B_%D0%BC%D0%B0%D1%82%D1%80%D0%B8%D1%87%D0%BD%D1%8B%D1%85_%D0%BD%D0%BE%D1%80%D0%BC).
**Синтаксис**
```sql
LinfDistance(tuple1, tuple2)
```
Синоним: `distanceLinf`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемые значения**
- Расстояние в норме Linf.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LinfDistance((1, 2), (2, 3));
```
Результат:
```text
┌─LinfDistance((1, 2), (2, 3))─┐
│ 1 │
└──────────────────────────────┘
```
## LpDistance {#lpdistance}
Вычисляет расстояние между двумя точками (значения кортежей — координаты точек) в пространстве [`Lp`](https://ru.wikipedia.org/wiki/%D0%9D%D0%BE%D1%80%D0%BC%D0%B0_(%D0%BC%D0%B0%D1%82%D0%B5%D0%BC%D0%B0%D1%82%D0%B8%D0%BA%D0%B0)#%D0%9D%D0%B5%D0%BA%D0%BE%D1%82%D0%BE%D1%80%D1%8B%D0%B5_%D0%B2%D0%B8%D0%B4%D1%8B_%D0%BC%D0%B0%D1%82%D1%80%D0%B8%D1%87%D0%BD%D1%8B%D1%85_%D0%BD%D0%BE%D1%80%D0%BC).
**Синтаксис**
```sql
LpDistance(tuple1, tuple2, p)
```
Синоним: `distanceLp`.
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — степень. Возможные значение: любое число из промежутка [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Расстояние в норме Lp.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LpDistance((1, 2), (2, 3), 3);
```
Результат:
```text
┌─LpDistance((1, 2), (2, 3), 3)─┐
│ 1.2599210498948732 │
└───────────────────────────────┘
```
## L1Normalize {#l1normalize}
Вычисляет единичный вектор для исходного вектора (значения кортежа — координаты вектора) в пространстве `L1` ([расстояние городских кварталов](https://ru.wikipedia.org/wiki/%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%B3%D0%BE%D1%80%D0%BE%D0%B4%D1%81%D0%BA%D0%B8%D1%85_%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB%D0%BE%D0%B2)).
**Синтаксис**
```sql
L1Normalize(tuple)
```
Синоним: `normalizeL1`.
**Аргументы**
- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Единичный вектор.
Тип: кортеж [Tuple](../../sql-reference/data-types/tuple.md) значений [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT L1Normalize((1, 2));
```
Результат:
```text
┌─L1Normalize((1, 2))─────────────────────┐
│ (0.3333333333333333,0.6666666666666666) │
└─────────────────────────────────────────┘
```
## L2Normalize {#l2normalize}
Вычисляет единичный вектор для исходного вектора (значения кортежа — координаты вектора) в пространстве `L2` ([Евклидово пространство](https://ru.wikipedia.org/wiki/%D0%95%D0%B2%D0%BA%D0%BB%D0%B8%D0%B4%D0%BE%D0%B2%D0%BE_%D0%BF%D1%80%D0%BE%D1%81%D1%82%D1%80%D0%B0%D0%BD%D1%81%D1%82%D0%B2%D0%BE).
**Синтаксис**
```sql
L2Normalize(tuple)
```
Синоним: `normalizeL1`.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Единичный вектор.
Тип: кортеж [Tuple](../../sql-reference/data-types/tuple.md) значений [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT L2Normalize((3, 4));
```
Результат:
```text
┌─L2Normalize((3, 4))─┐
│ (0.6,0.8) │
└─────────────────────┘
```
## LinfNormalize {#linfnormalize}
Вычисляет единичный вектор для исходного вектора (значения кортежа — координаты вектора) в пространстве [`L_{inf}`](https://ru.wikipedia.org/wiki/%D0%9D%D0%BE%D1%80%D0%BC%D0%B0_(%D0%BC%D0%B0%D1%82%D0%B5%D0%BC%D0%B0%D1%82%D0%B8%D0%BA%D0%B0)#%D0%9D%D0%B5%D0%BA%D0%BE%D1%82%D0%BE%D1%80%D1%8B%D0%B5_%D0%B2%D0%B8%D0%B4%D1%8B_%D0%BC%D0%B0%D1%82%D1%80%D0%B8%D1%87%D0%BD%D1%8B%D1%85_%D0%BD%D0%BE%D1%80%D0%BC).
**Синтаксис**
```sql
LinfNormalize(tuple)
```
Синоним: `normalizeLinf `.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Единичный вектор.
Тип: кортеж [Tuple](../../sql-reference/data-types/tuple.md) значений [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LinfNormalize((3, 4));
```
Результат:
```text
┌─LinfNormalize((3, 4))─┐
│ (0.75,1) │
└───────────────────────┘
```
## LpNormalize {#lpnormalize}
Вычисляет единичный вектор для исходного вектора (значения кортежа — координаты вектора) в пространстве [`Lp`](https://ru.wikipedia.org/wiki/%D0%9D%D0%BE%D1%80%D0%BC%D0%B0_(%D0%BC%D0%B0%D1%82%D0%B5%D0%BC%D0%B0%D1%82%D0%B8%D0%BA%D0%B0)#%D0%9D%D0%B5%D0%BA%D0%BE%D1%82%D0%BE%D1%80%D1%8B%D0%B5_%D0%B2%D0%B8%D0%B4%D1%8B_%D0%BC%D0%B0%D1%82%D1%80%D0%B8%D1%87%D0%BD%D1%8B%D1%85_%D0%BD%D0%BE%D1%80%D0%BC).
**Синтаксис**
```sql
LpNormalize(tuple, p)
```
Синоним: `normalizeLp `.
**Аргументы**
- `tuple` — кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `p` — степень. Возможные значение: любое число из промежутка [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Единичный вектор.
Тип: кортеж [Tuple](../../sql-reference/data-types/tuple.md) значений [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT LpNormalize((3, 4),5);
```
Результат:
```text
┌─LpNormalize((3, 4), 5)──────────────────┐
│ (0.7187302630182624,0.9583070173576831) │
└─────────────────────────────────────────┘
```
## cosineDistance {#cosinedistance}
Вычисляет косинусную разницу двух векторов (значения кортежей — координаты векторов). Чем меньше возвращаемое значение, тем больше сходство между векторами.
**Синтаксис**
```sql
cosineDistance(tuple1, tuple2)
```
**Аргументы**
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемые значения**
- Разность между единицей и косинуса угла между векторами.
Тип: [Float](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
```sql
SELECT cosineDistance((1, 2), (2, 3));
```
Результат:
```text
┌─cosineDistance((1, 2), (2, 3))─┐
│ 0.007722123286332261 │
└────────────────────────────────┘
```

View File

@ -18,20 +18,30 @@ toc_title: "Операторы"
`-a` - функция `negate(a)`.
Для чисел в кортеже также может быть использована [tupleNegate](../../sql-reference/functions/tuple-functions.md#tuplenegate).
## Операторы умножения и деления {#operatory-umnozheniia-i-deleniia}
`a * b` - функция `multiply(a, b)`
Для умножения кортежа на число также может быть использована [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), для скалярного произведения: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
`a / b` - функция `divide(a, b)`
Для деления кортежа на число также может быть использована [tupleDivideByNumber](../../sql-reference/functions/tuple-functions.md#tupledividebynumber).
`a % b` - функция `modulo(a, b)`
## Операторы сложения и вычитания {#operatory-slozheniia-i-vychitaniia}
`a + b` - функция `plus(a, b)`
Для сложения кортежей также может быть использована [tuplePlus](../../sql-reference/functions/tuple-functions.md#tupleplus).
`a - b` - функция `minus(a, b)`
Для вычитания кортежей также может быть использована [tupleMinus](../../sql-reference/functions/tuple-functions.md#tupleminus).
## Операторы сравнения {#operatory-sravneniia}
`a = b` - функция `equals(a, b)`
@ -72,6 +82,53 @@ toc_title: "Операторы"
`a GLOBAL NOT IN ...` - функция `globalNotIn(a, b)`
`a = ANY (subquery)` функция `in(a, subquery)`.
`a != ANY (subquery)` равнозначно `a NOT IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a = ALL (subquery)` равнозначно `a IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a != ALL (subquery)` функция `notIn(a, subquery)`.
**Примеры**
Запрос с ALL:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ALL (SELECT number FROM numbers(3, 3));
```
Результат:
``` text
┌─a─┐
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
Запрос с ANY:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ANY (SELECT number FROM numbers(3, 3));
```
Результат:
``` text
┌─a─┐
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
## Оператор для работы с датами и временем {#operators-datetime}
### EXTRACT {#extract}

View File

@ -121,3 +121,14 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
- Данные поступают в режиме реального времени.
- Вы загружаете данные, которые как правило отсортированы по времени.
Также возможно вставлять данные асинхронно во множественных маленьких вставках. Данные от таких вставок сначала собираются в пачки, а потом вставляются в таблицу. Чтобы включить асинхронный режим, используйте настройку [async_insert](../../operations/settings/settings.md#async-insert). Обратите внимание, что асинхронные вставки поддерживаются только через протокол HTTP, а дедупликация при этом не производится.
**См. также**
- [async_insert](../../operations/settings/settings.md#async-insert)
- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads)
- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert)
- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout)
- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size)
- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms)
- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms)

View File

@ -0,0 +1,69 @@
---
toc_title: EXCEPT
---
# Секция EXCEPT {#except-clause}
`EXCEPT` возвращает только те строки, которые являются результатом первого запроса без результатов второго. В запросах количество, порядок следования и типы столбцов должны совпадать. Результат `EXCEPT` может содержать повторяющиеся строки.
Если используется несколько `EXCEPT`, и в выражении не указаны скобки, `EXCEPT` выполняется по порядку слева направо. `EXCEPT` имеет такой же приоритет выполнения, как `UNION`, и приоритет ниже, чем у `INTERSECT`.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
EXCEPT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
Условие в секции `WHERE` может быть любым в зависимости от ваших требований.
**Примеры**
Запрос:
``` sql
SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6);
```
Результат:
``` text
┌─number─┐
│ 1 │
│ 2 │
│ 9 │
│ 10 │
└────────┘
```
Запрос:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
```
Результат:
``` text
┌─one─┬─two─┬─three─┐
│ l │ p │ o │
│ k │ t │ d │
│ l │ p │ o │
└─────┴─────┴───────┘
```
**См. также**
- [UNION](union.md#union-clause)
- [INTERSECT](intersect.md#intersect-clause)

View File

@ -45,8 +45,10 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
- [Секция LIMIT BY](limit-by.md)
- [Секция HAVING](having.md)
- [Секция LIMIT](limit.md)
[Секция OFFSET](offset.md)
- [Секция OFFSET](offset.md)
- [Секция UNION ALL](union.md)
- [Секция INTERSECT](intersect.md)
- [Секция EXCEPT](except.md)
- [Секция INTO OUTFILE](into-outfile.md)
- [Секция FORMAT](format.md)

View File

@ -0,0 +1,73 @@
---
toc_title: INTERSECT
---
# Секция INTERSECT {#intersect-clause}
`INTERSECT` возвращает строки, которые есть только в результатах первого и второго запросов. В запросах должны совпадать количество столбцов, их порядок и тип. Результат `INTERSECT` может содержать повторяющиеся строки.
Если используется несколько `INTERSECT` и скобки не указаны, пересечение выполняется слева направо. У `INTERSECT` более высокий приоритет выполнения, чем у `UNION` и `EXCEPT`.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
INTERSECT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
Условие может быть любым в зависимости от ваших требований.
**Примеры**
Запрос:
``` sql
SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6);
```
Результат:
``` text
┌─number─┐
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
└────────┘
```
Запрос:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 INTERSECT SELECT * FROM t2;
```
Результат:
``` text
┌─one─┬─two─┬─three─┐
│ q │ m │ b │
│ s │ d │ f │
│ s │ d │ f │
│ s │ d │ f │
└─────┴─────┴───────┘
```
**См. также**
- [UNION](union.md#union-clause)
- [EXCEPT](except.md#except-clause)

View File

@ -1 +0,0 @@
../../en/development/continuous-integration.md

View File

@ -0,0 +1,155 @@
# 持续集成检查 {#continuous-integration-checks}
当你提交一个pull请求时, ClickHouse[持续集成(CI)系统](https://clickhouse.com/docs/en/development/tests/#test-automation)会对您的代码运行一些自动检查.
这在存储库维护者(来自ClickHouse团队的人)筛选了您的代码并将可测试标签添加到您的pull请求之后发生.
检查的结果被列在[GitHub检查文档](https://docs.github.com/en/github/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks)中所述的GitHub pull请求页面.
如果检查失败,您可能被要求去修复它. 该界面介绍了您可能遇到的检查,以及如何修复它们.
如果检查失败看起来与您的更改无关, 那么它可能是一些暂时的故障或基础设施问题. 向pull请求推一个空的commit以重新启动CI检查:
```
git reset
git commit --allow-empty
git push
```
如果您不确定要做什么,可以向维护人员寻求帮助.
## 与Master合并 {#merge-with-master}
验证PR是否可以合并到master. 如果没有, 它将失败并显示消息'Cannot fetch mergecommit'的.请按[GitHub文档](https://docs.github.com/en/github/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github)中描述的冲突解决, 或使用git将主分支合并到您的pull请求分支来修复这个检查.
## 文档检查 {#docs-check}
尝试构建ClickHouse文档网站. 如果您更改了文档中的某些内容, 它可能会失败. 最可能的原因是文档中的某些交叉链接是错误的. 转到检查报告并查找`ERROR`和`WARNING`消息.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
- `docs_output.txt`包含构建日志信息. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
## 描述信息检查 {#description-check}
检查pull请求的描述是否符合[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md)模板.
您必须为您的更改指定一个更改日志类别(例如Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/)编写一条用户可读的消息用来描述更改.
## 推送到DockerHub {#push-to-dockerhub}
生成用于构建和测试的docker映像, 然后将它们推送到DockerHub.
## 标记检查 {#marker-check}
该检查意味着CI系统已经开始处理PR.当它处于'待处理'状态时,意味着尚未开始所有检查. 启动所有检查后,状态更改为'成功'.
# 格式检查 {#style-check}
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
### PVS 检查 {#pvs-check}
使用静态分析工具[PVS-studio](https://www.viva64.com/en/pvs-studio/)检查代码. 查看报告以查看确切的错误.如果可以则修复它们, 如果不行, 可以向ClickHouse的维护人员寻求帮忙.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/pvs_check.html)
- `test_run.txt.out.log`包含构建和分析日志文件.它只包含解析或未找到的错误.
- `HTML report`包含分析结果.有关说明请访问PVS的[官方网站](https://www.viva64.com/en/m/0036/#ID14E9A2B2CD)
## 快速测试 {#fast-test}
通常情况下这是PR运行的第一个检查.它构建ClickHouse以及大多数无状态运行测试, 其中省略了一些.如果失败,在修复之前不会开始进一步的检查. 查看报告以了解哪些测试失败, 然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)
#### 状态页文件 {#status-page-files}
- `runlog.out.log` 是包含所有其他日志的通用日志.
- `test_log.txt`
- `submodule_log.txt` 包含关于克隆和检查所需子模块的消息.
- `stderr.log`
- `stdout.log`
- `clickhouse-server.log`
- `clone_log.txt`
- `install_log.txt`
- `clickhouse-server.err.log`
- `build_log.txt`
- `cmake_log.txt` 包含关于C/C++和Linux标志检查的消息.
#### 状态页列信息 {#status-page-columns}
- 测试名称 -- 包含测试的名称(不带路径, 例如, 所有类型的测试将被剥离到该名称).
- 测试状态 -- 跳过、成功或失败之一.
- 测试时间, 秒. -- 这个测试是空的.
## 建构检查 {#build-check}
在各种配置中构建ClickHouse, 以便在后续步骤中使用. 您必须修复失败的构建.构建日志通常有足够的信息来修复错误, 但是您可能必须在本地重现故障. `cmake`选项可以在构建日志中通过grep `cmake`操作找到.使用这些选项并遵循[一般的构建过程](./build.md).
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html)
- **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
- **Status**: `成功``失败`
- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.
- **Build time**.
- **Artifacts**: 构建结果文件 (`XXX`是服务器版本, 比如`20.8.1.4344`).
- `clickhouse-client_XXX_all.deb`
-` clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件.
- `shared_build.tgz`: 使用共享库构建.
- `performance.tgz`: 用于性能测试的特殊包.
## 特殊构建检查 {#special-buildcheck}
使用clang-tidy执行静态分析和代码样式检查. 该报告类似于构建检查. 修复在构建日志中发现的错误.
## 功能无状态测试 {#functional-stateless-tests}
为构建在不同配置中的ClickHouse二进制文件运行[无状态功能测试](./tests.md#functional-tests)——发布、调试、使用杀毒软件等.通过报告查看哪些测试失败,然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.注意, 您必须使用正确的构建配置来重现——在AddressSanitizer下测试可能失败,但在Debug中可以通过.从[CI构建检查页面](./build.md#you-dont-have-to-build-clickhouse)下载二进制文件, 或者在本地构建它.
## 功能有状态测试 {#functional-stateful-tests}
运行[有状态功能测试](./tests.md#functional-tests).以无状态功能测试相同的方式对待它们.不同之处在于它们需要从[Yandex.Metrica数据集](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/)的`hits`和`visits`表来运行.
## 集成测试 {#integration-tests}
运行[集成测试](./tests.md#integration-tests).
## Testflows 检查{#testflows-check}
使用Testflows测试系统去运行一些测试, 在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally)查看如何在本地运行它们.
## 压力测试 {#stress-test}
从多个客户端并发运行无状态功能测试, 用以检测与并发相关的错误.如果失败:
```
* Fix all other test failures first;
* Look at the report to find the server logs and check them for possible causes
of error.
```
## 冒烟测试 {#split-build-smoke-test}
检查[拆分构建](./build.md#split-build)配置中的服务器构建是否可以启动并运行简单查询.如果失败:
```
* Fix other test errors first;
* Build the server in [split build](./build.md#split-build) configuration
locally and check whether it can start and run `select 1`.
```
## 兼容性检查 {#compatibility-check}
检查`clickhouse`二进制文件是否可以在带有旧libc版本的发行版上运行.如果失败, 请向维护人员寻求帮助.
## AST模糊器 {#ast-fuzzer}
运行随机生成的查询来捕获程序错误.如果失败, 请向维护人员寻求帮助.
## 性能测试 {#performance-tests}
测量查询性能的变化. 这是最长的检查, 只需不到 6 小时即可运行.性能测试报告在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report)有详细描述.
## 质量保证 {#qa}
什么是状态页面上的任务(专用网络)项目?
它是 Yandex 内部工作系统的链接. Yandex 员工可以看到检查的开始时间及其更详细的状态.
运行测试的地方
Yandex 内部基础设施的某个地方.

View File

@ -1 +0,0 @@
../../../en/operations/external-authenticators/kerberos.md

View File

@ -0,0 +1,105 @@
# Kerberos认证 {#external-authenticators-kerberos}
现有正确配置的 ClickHouse 用户可以通过 Kerberos 身份验证协议进行身份验证.
目前, Kerberos 只能用作现有用户的外部身份验证器,这些用户在 `users.xml` 或本地访问控制路径中定义.
这些用户只能使用 HTTP 请求, 并且必须能够使用 GSS-SPNEGO 机制进行身份验证.
对于这种方法, 必须在系统中配置 Kerberos, 且必须在 ClickHouse 配置中启用.
## 开启Kerberos {#enabling-kerberos-in-clickHouse}
要启用 Kerberos, 应该在 `config.xml` 中包含 `kerberos` 部分. 此部分可能包含其他参数.
#### 参数: {#parameters}
- `principal` - 将在接受安全上下文时获取和使用的规范服务主体名称.
- 此参数是可选的, 如果省略, 将使用默认主体.
- `realm` - 一个领域, 用于将身份验证限制为仅那些发起者领域与其匹配的请求.
- 此参数是可选的,如果省略,则不会应用其他领域的过滤.
示例 (进入 `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
主体规范:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
按领域过滤:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "注意"
您只能定义一个 `kerberos` 部分. 多个 `kerberos` 部分的存在将强制 ClickHouse 禁用 Kerberos 身份验证.
!!! warning "注意"
`主体`和`领域`部分不能同时指定. `主体`和`领域`的出现将迫使ClickHouse禁用Kerberos身份验证.
## Kerberos作为现有用户的外部身份验证器 {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos可以用作验证本地定义用户(在`users.xml`或本地访问控制路径中定义的用户)身份的方法。目前,**只有**通过HTTP接口的请求才能被认证(通过GSS-SPNEGO机制).
Kerberos主体名称格式通常遵循以下模式:
- *primary/instance@REALM*
*/instance* 部分可能出现零次或多次. **发起者的规范主体名称的主要部分应与被认证用户名匹配, 以便身份验证成功**.
### `users.xml`中启用Kerberos {#enabling-kerberos-in-users-xml}
为了启用用户的 Kerberos 身份验证, 请在用户定义中指定 `kerberos` 部分而不是`密码`或类似部分.
参数:
- `realm` - 用于将身份验证限制为仅那些发起者的领域与其匹配的请求的领域.
- 此参数是可选的, 如果省略, 则不会应用其他按领域的过滤.
示例 (进入 `users.xml`):
```
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "警告"
注意, Kerberos身份验证不能与任何其他身份验证机制一起使用. 任何其他部分(如`密码`和`kerberos`)的出现都会迫使ClickHouse关闭.
!!! info "提醒"
请注意, 现在, 一旦用户 `my_user` 使用 `kerberos`, 必须在主 `config.xml` 文件中启用 Kerberos如前所述.
### 使用 SQL 启用 Kerberos {#enabling-kerberos-using-sql}
在 ClickHouse 中启用 [SQL 驱动的访问控制和帐户管理](https://clickhouse.com/docs/en/operations/access-rights/#access-control)后, 也可以使用 SQL 语句创建由 Kerberos 识别的用户.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...或者, 不按领域过滤:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -5,4 +5,34 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
## system.asynchronous_metric_log {#system-tables-async-log}
包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))
包含每分钟记录一次的 `system.asynchronous_metrics`历史值. 默认开启.
列:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒).
- `name` ([String](../../sql-reference/data-types/string.md)) — 指标名.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值.
**示例**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
```
**另请参阅**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含在后台定期计算的指标.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含定期刷新到磁盘表 `system.metrics` 以及 `system.events` 中的指标值历史记录.

View File

@ -488,8 +488,8 @@ try
}
catch (const Exception & e)
{
bool print_stack_trace = config().getBool("stacktrace", false) && e.code() != ErrorCodes::NETWORK_ERROR;
std::cerr << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl;
bool need_print_stack_trace = config().getBool("stacktrace", false) && e.code() != ErrorCodes::NETWORK_ERROR;
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl << std::endl;
/// If exception code isn't zero, we should return non-zero return code anyway.
return e.code() ? e.code() : -1;
}

View File

@ -5,6 +5,7 @@
#include <Common/DNSResolver.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Coordination/Defines.h>
#include <Common/Config/ConfigReloader.h>
#include <filesystem>
#include <IO/UseSSL.h>
#include <Core/ServerUUID.h>
@ -331,6 +332,8 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
const Settings & settings = global_context->getSettingsRef();
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 100));
static ServerErrorHandler error_handler;
@ -402,8 +405,27 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
for (auto & server : *servers)
server.start();
zkutil::EventPtr unused_event = std::make_shared<Poco::Event>();
zkutil::ZooKeeperNodeCache unused_cache([] { return nullptr; });
/// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path,
include_from_path,
config().getString("path", ""),
std::move(unused_cache),
unused_event,
[&](ConfigurationPtr config, bool /* initial_loading */)
{
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);
},
/* already_loaded = */ false); /// Reload it right now (initial loading)
SCOPE_EXIT({
LOG_INFO(log, "Shutting down.");
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
/// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
main_config_reloader.reset();
global_context->shutdown();
@ -450,6 +472,7 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
buildLoggers(config(), logger());
main_config_reloader->start();
LOG_INFO(log, "Ready for connections.");

View File

@ -18,7 +18,9 @@
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/ThreadStatus.h>
#include <Common/TLDListsHolder.h>
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
#include <loggers/Loggers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
@ -35,7 +37,6 @@
#include <Formats/registerFormats.h>
#include <boost/program_options/options_description.hpp>
#include <base/argsToConfig.h>
#include <Common/randomSeed.h>
#include <filesystem>
namespace fs = std::filesystem;
@ -179,7 +180,6 @@ void LocalServer::initialize(Poco::Util::Application & self)
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", "."));
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
@ -284,6 +284,11 @@ void LocalServer::tryInitPath()
global_context->setFlagsPath(path + "flags");
global_context->setUserFilesPath(""); // user's files are everywhere
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
}
@ -380,7 +385,6 @@ void LocalServer::setupUsers()
const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml"));
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
users_config = loaded_config.configuration;
}
else
@ -459,8 +463,8 @@ catch (const DB::Exception & e)
{
cleanup();
bool print_stack_trace = config().getBool("stacktrace", false);
std::cerr << getExceptionMessage(e, print_stack_trace, true) << std::endl;
bool need_print_stack_trace = config().getBool("stacktrace", false);
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
return e.code() ? e.code() : -1;
}
catch (...)
@ -673,6 +677,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
}

View File

@ -863,6 +863,9 @@ if (ThreadFuzzer::instance().isEffective())
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);
if (!initial_loading)
{
/// We do not load ZooKeeper configuration on the first config loading
@ -957,9 +960,14 @@ if (ThreadFuzzer::instance().isEffective())
global_context->setMMappedFileCache(mmap_cache_size);
#if USE_EMBEDDED_COMPILER
/// 128 MB
constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
constexpr size_t compiled_expression_cache_elements_size_default = 10000;
size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
#endif
/// Set path for format schema files

View File

@ -351,9 +351,12 @@
-->
<mmap_cache_size>1000</mmap_cache_size>
<!-- Cache size for compiled expressions.-->
<!-- Cache size in bytes for compiled expressions.-->
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<!-- Cache size in elements for compiled expressions.-->
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<!-- Path to data directory, with trailing slash. -->
<path>/var/lib/clickhouse/</path>

View File

@ -279,9 +279,12 @@ mark_cache_size: 5368709120
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000
# Cache size for compiled expressions.
# Cache size in bytes for compiled expressions.
compiled_expression_cache_size: 134217728
# Cache size in elements for compiled expressions.
compiled_expression_cache_elements_size: 10000
# Path to data directory, with trailing slash.
path: /var/lib/clickhouse/

View File

@ -549,13 +549,38 @@
document.getElementById('logo-container').style.display = 'block';
}
function formatReadable(number = 0, decimals = 2, units = []) {
const k = 1000;
const i = number ? Math.floor(Math.log(number) / Math.log(k)) : 0;
const unit = units[i];
const dm = unit ? decimals : 0;
return Number(number / Math.pow(k, i)).toFixed(dm) + unit;
}
function formatReadableBytes(bytes) {
const units = [' B', ' KB', ' MB', ' GB', ' TB', ' PB', ' EB', ' ZB', ' YB'];
return formatReadable(bytes, 2, units);
}
function formatReadableRows(rows) {
const units = ['', ' thousand', ' million', ' billion', ' trillion', ' quadrillion'];
return formatReadable(rows, 2, units);
}
function renderResult(response)
{
clear();
let stats = document.getElementById('stats');
stats.innerText = 'Elapsed: ' + response.statistics.elapsed.toFixed(3) + " sec, read " + response.statistics.rows_read + " rows.";
const seconds = response.statistics.elapsed.toFixed(3);
const rows = response.statistics.rows_read;
const bytes = response.statistics.bytes_read;
const formatted_bytes = formatReadableBytes(bytes);
const formatted_rows = formatReadableRows(rows);
stats.innerText = `Elapsed: ${seconds} sec, read ${formatted_rows} rows, ${formatted_bytes}.`;
/// We can also render graphs if user performed EXPLAIN PIPELINE graph=1.
if (response.data.length > 3 && response.data[0][0] === "digraph" && document.getElementById('query').value.match(/^\s*EXPLAIN/i)) {

View File

@ -110,7 +110,7 @@ enum class AccessType
(anyone can kill his own queries) */\
\
M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table
identified by it's ZooKeeper path */\
identified by its ZooKeeper path */\
\
M(CREATE_USER, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_USER, "", GLOBAL, ACCESS_MANAGEMENT) \

View File

@ -36,12 +36,13 @@ public:
using Mapped = TMapped;
using MappedPtr = std::shared_ptr<Mapped>;
private:
using Clock = std::chrono::steady_clock;
public:
LRUCache(size_t max_size_)
: max_size(std::max(static_cast<size_t>(1), max_size_)) {}
/** Initialize LRUCache with max_size and max_elements_size.
* max_elements_size == 0 means no elements size restrictions.
*/
LRUCache(size_t max_size_, size_t max_elements_size_ = 0)
: max_size(std::max(static_cast<size_t>(1), max_size_))
, max_elements_size(max_elements_size_)
{}
MappedPtr get(const Key & key)
{
@ -252,6 +253,7 @@ private:
/// Total weight of values.
size_t current_size = 0;
const size_t max_size;
const size_t max_elements_size;
std::atomic<size_t> hits {0};
std::atomic<size_t> misses {0};
@ -311,7 +313,8 @@ private:
{
size_t current_weight_lost = 0;
size_t queue_size = cells.size();
while ((current_size > max_size) && (queue_size > 1))
while ((current_size > max_size || (max_elements_size != 0 && queue_size > max_elements_size)) && (queue_size > 1))
{
const Key & key = queue.front();

View File

@ -227,7 +227,8 @@
M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
\
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
\
M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \

View File

@ -14,17 +14,19 @@
namespace
{
constexpr UInt64 ZERO = 0;
constexpr UInt64 ALL_THREADS = 0;
UInt64 calculateNewCoresNumber(DB::ThreadIdToTimeMap const & prev, DB::ThreadIdToTimeMap const& next)
{
if (next.find(ZERO) == next.end())
return ZERO;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), ZERO,
[&prev](UInt64 acc, auto const & elem)
if (next.find(ALL_THREADS) == next.end())
return 0;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), 0,
[&prev](UInt64 acc, const auto & elem)
{
if (elem.first == ZERO)
if (elem.first == ALL_THREADS)
return acc;
auto thread_time = elem.second.time();
auto it = prev.find(elem.first);
if (it != prev.end())
@ -32,9 +34,9 @@ namespace
return acc + thread_time;
});
auto elapsed = next.at(ZERO).time() - (prev.contains(ZERO) ? prev.at(ZERO).time() : ZERO);
if (elapsed == ZERO)
return ZERO;
auto elapsed = next.at(ALL_THREADS).time() - (prev.contains(ALL_THREADS) ? prev.at(ALL_THREADS).time() : 0);
if (elapsed == 0)
return 0;
return (accumulated + elapsed - 1) / elapsed;
}
}
@ -109,7 +111,7 @@ size_t ProgressIndication::getUsedThreadsCount() const
UInt64 ProgressIndication::getApproximateCoresNumber() const
{
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), ZERO,
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), 0,
[](UInt64 acc, auto const & elem)
{
return acc + elem.second;
@ -121,11 +123,12 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{},
[](MemoryUsage const & acc, auto const & host_data)
{
auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO,
[](UInt64 memory, auto const & data)
{
return memory + data.second.memory_usage;
});
UInt64 host_usage = 0;
// In ProfileEvents packets thread id 0 specifies common profiling information
// for all threads executing current query on specific host. So instead of summing per thread
// memory consumption it's enough to look for data with thread id 0.
if (auto it = host_data.second.find(ALL_THREADS); it != host_data.second.end())
host_usage = it->second.memory_usage;
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
});
}

View File

@ -15,6 +15,7 @@
namespace ProfileEvents
{
extern const Event QueryProfilerSignalOverruns;
extern const Event QueryProfilerRuns;
}
namespace DB
@ -60,6 +61,7 @@ namespace
const StackTrace stack_trace(signal_context);
TraceCollector::collect(trace_type, stack_trace, 0);
ProfileEvents::increment(ProfileEvents::QueryProfilerRuns);
errno = saved_errno;
}

View File

@ -116,9 +116,9 @@ public:
/// lower and uppercase variants of the first octet of the first character in `needle`
size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
size_t length_u = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
if (length_l != length_r)
if (length_l != length_u)
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
}
@ -183,6 +183,31 @@ public:
#endif
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
ALWAYS_INLINE bool compareTrivial(const CharT * haystack_pos, const CharT * const haystack_end, const uint8_t * needle_pos) const
{
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
return needle_pos == needle_end;
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
{
@ -200,34 +225,15 @@ public:
{
if (mask == cachemask)
{
pos += cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(pos, haystack_end, needle))
return true;
}
}
else if ((mask & cachemask) == cachemask)
return true;
{
if (compareTrivial(pos, haystack_end, needle))
return true;
}
return false;
}
@ -238,25 +244,7 @@ public:
pos += first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(pos, haystack_end, needle_pos))
return true;
}
@ -299,40 +287,21 @@ public:
const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu);
const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset);
const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset);
const auto mask_offset_both = _mm_movemask_epi8(v_against_l_or_u_offset);
if (0xffff == cachemask)
{
if (mask_offset == cachemask)
if (mask_offset_both == cachemask)
{
auto haystack_pos = haystack + cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(haystack, haystack_end, needle))
return haystack;
}
}
else if ((mask_offset & cachemask) == cachemask)
return haystack;
else if ((mask_offset_both & cachemask) == cachemask)
{
if (compareTrivial(haystack, haystack_end, needle))
return haystack;
}
/// first octet was ok, but not the first 16, move to start of next sequence and reapply
haystack += UTF8::seqLength(*haystack);
@ -349,25 +318,7 @@ public:
auto haystack_pos = haystack + first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(haystack_pos, haystack_end, needle_pos))
return haystack;
}

View File

@ -730,7 +730,10 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
}
};
while (!condition || !condition())
/// do-while control structure to allow using this function in non-blocking
/// fashion with a wait condition which returns false by the time this
/// method is called.
do
{
/// Use getData insteand of exists to avoid watch leak.
impl->get(path, callback, watch);
@ -746,7 +749,8 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
if (state->event_type == Coordination::DELETED)
return true;
}
} while (!condition || !condition());
return false;
}

View File

@ -631,6 +631,14 @@ LogEntryPtr Changelog::entryAt(uint64_t index)
return src;
}
LogEntryPtr Changelog::getLatestConfigChange() const
{
for (const auto & [_, entry] : logs)
if (entry->get_val_type() == nuraft::conf)
return entry;
return nullptr;
}
nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index, int32_t count)
{
std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;

View File

@ -102,6 +102,9 @@ public:
/// Last entry in log, or fake entry with term 0 if log is empty
LogEntryPtr getLastEntry() const;
/// Get entry with latest config in logstore
LogEntryPtr getLatestConfigChange() const;
/// Return log entries between [start, end)
LogEntriesPtr getLogEntriesBetween(uint64_t start_index, uint64_t end_index);

View File

@ -38,7 +38,8 @@ struct Settings;
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0)
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -280,7 +280,6 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
{
LOG_INFO(log, "Starting Keeper asynchronously, server will accept connections to Keeper when it will be ready");
}
}
catch (...)
{
@ -290,6 +289,8 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
/// Start it after keeper server start
session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); });
update_configuration_thread = ThreadFromGlobalPool([this] { updateConfigurationThread(); });
updateConfiguration(config);
LOG_DEBUG(log, "Dispatcher initialized");
}
@ -325,6 +326,10 @@ void KeeperDispatcher::shutdown()
snapshots_queue.finish();
if (snapshot_thread.joinable())
snapshot_thread.join();
update_configuration_queue.finish();
if (update_configuration_thread.joinable())
update_configuration_thread.join();
}
if (server)
@ -505,4 +510,71 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
return future.get();
}
void KeeperDispatcher::updateConfigurationThread()
{
while (true)
{
if (shutdown_called)
return;
try
{
if (!server->checkInit())
{
LOG_INFO(log, "Server still not initialized, will not apply configuration until initialization finished");
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
continue;
}
ConfigUpdateAction action;
if (!update_configuration_queue.pop(action))
break;
/// We must wait this update from leader or apply it ourself (if we are leader)
bool done = false;
while (!done)
{
if (shutdown_called)
return;
if (isLeader())
{
server->applyConfigurationUpdate(action);
done = true;
}
else
{
done = server->waitConfigurationUpdate(action);
if (!done)
LOG_INFO(log, "Cannot wait for configuration update, maybe we become leader, or maybe update is invalid, will try to wait one more time");
}
}
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
{
auto diff = server->getConfigurationDiff(config);
if (diff.empty())
LOG_TRACE(log, "Configuration update triggered, but nothing changed for RAFT");
else if (diff.size() > 1)
LOG_WARNING(log, "Configuration changed for more than one server ({}) from cluster, it's strictly not recommended", diff.size());
else
LOG_DEBUG(log, "Configuration change size ({})", diff.size());
for (auto & change : diff)
{
bool push_result = update_configuration_queue.push(change);
if (!push_result)
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue");
}
}
}

View File

@ -33,12 +33,16 @@ private:
CoordinationSettingsPtr coordination_settings;
using RequestsQueue = ConcurrentBoundedQueue<KeeperStorage::RequestForSession>;
using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>;
/// Size depends on coordination settings
std::unique_ptr<RequestsQueue> requests_queue;
ResponsesQueue responses_queue;
SnapshotsQueue snapshots_queue{1};
/// More than 1k updates is definitely misconfiguration.
UpdateConfigurationQueue update_configuration_queue{1000};
std::atomic<bool> shutdown_called{false};
std::mutex session_to_response_callback_mutex;
@ -62,6 +66,8 @@ private:
ThreadFromGlobalPool session_cleaner_thread;
/// Dumping new snapshots to disk
ThreadFromGlobalPool snapshot_thread;
/// Apply or wait for configuration changes
ThreadFromGlobalPool update_configuration_thread;
/// RAFT wrapper.
std::unique_ptr<KeeperServer> server;
@ -80,6 +86,8 @@ private:
void sessionCleanerTask();
/// Thread create snapshots in the background
void snapshotThread();
/// Thread apply or wait configuration changes from leader
void updateConfigurationThread();
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
@ -107,6 +115,10 @@ public:
return server && server->checkInit();
}
/// Registered in ConfigReloader callback. Add new configuration changes to
/// update_configuration_queue. Keeper Dispatcher apply them asynchronously.
void updateConfiguration(const Poco::Util::AbstractConfiguration & config);
/// Shutdown internal keeper parts (server, state machine, log storage, etc)
void shutdown();

View File

@ -112,4 +112,10 @@ void KeeperLogStore::end_of_append_batch(uint64_t /*start_index*/, uint64_t /*co
changelog.flush();
}
nuraft::ptr<nuraft::log_entry> KeeperLogStore::getLatestConfigChange() const
{
std::lock_guard lock(changelog_lock);
return changelog.getLatestConfigChange();
}
}

View File

@ -58,6 +58,9 @@ public:
/// Flush batch of appended entries
void end_of_append_batch(uint64_t start_index, uint64_t count) override;
/// Get entry with latest config in logstore
nuraft::ptr<nuraft::log_entry> getLatestConfigChange() const;
private:
mutable std::mutex changelog_lock;
Poco::Logger * log;

View File

@ -121,23 +121,42 @@ void KeeperServer::startup()
state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items);
bool single_server = state_manager->getTotalServers() == 1;
auto latest_snapshot_config = state_machine->getClusterConfig();
auto latest_log_store_config = state_manager->getLatestConfigFromLogStore();
nuraft::raft_params params;
if (single_server)
if (latest_snapshot_config && latest_log_store_config)
{
/// Don't make sense in single server mode
params.heart_beat_interval_ = 0;
params.election_timeout_lower_bound_ = 0;
params.election_timeout_upper_bound_ = 0;
if (latest_snapshot_config->get_log_idx() > latest_log_store_config->get_log_idx())
{
LOG_INFO(log, "Will use config from snapshot with log index {}", latest_snapshot_config->get_log_idx());
state_manager->save_config(*latest_snapshot_config);
}
else
{
LOG_INFO(log, "Will use config from log store with log index {}", latest_snapshot_config->get_log_idx());
state_manager->save_config(*latest_log_store_config);
}
}
else if (latest_snapshot_config)
{
LOG_INFO(log, "No config in log store, will use config from snapshot with log index {}", latest_snapshot_config->get_log_idx());
state_manager->save_config(*latest_snapshot_config);
}
else if (latest_log_store_config)
{
LOG_INFO(log, "No config in snapshot, will use config from log store with log index {}", latest_log_store_config->get_log_idx());
state_manager->save_config(*latest_log_store_config);
}
else
{
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
LOG_INFO(log, "No config in log store and snapshot, probably it's initial run. Will use config from .xml on disk");
}
nuraft::raft_params params;
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
params.reserved_log_items_ = coordination_settings->reserved_log_items;
params.snapshot_distance_ = coordination_settings->snapshot_distance;
params.stale_log_gap_ = coordination_settings->stale_log_gap;
@ -364,4 +383,139 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
return state_machine->getDeadSessions();
}
ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
{
return state_manager->getConfigurationDiff(config);
}
void KeeperServer::applyConfigurationUpdate(const ConfigUpdateAction & task)
{
size_t sleep_ms = 500;
if (task.action_type == ConfigUpdateActionType::AddServer)
{
LOG_INFO(log, "Will try to add server with id {}", task.server->get_id());
bool added = false;
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count; ++i)
{
if (raft_instance->get_srv_config(task.server->get_id()) != nullptr)
{
LOG_INFO(log, "Server with id {} was successfully added", task.server->get_id());
added = true;
break;
}
if (!isLeader())
{
LOG_INFO(log, "We are not leader anymore, will not try to add server {}", task.server->get_id());
break;
}
auto result = raft_instance->add_srv(*task.server);
if (!result->get_accepted())
LOG_INFO(log, "Command to add server {} was not accepted for the {} time, will sleep for {} ms and retry", task.server->get_id(), i + 1, sleep_ms * (i + 1));
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1)));
}
if (!added)
throw Exception(ErrorCodes::RAFT_ERROR, "Configuration change to add server (id {}) was not accepted by RAFT after all {} retries", task.server->get_id(), coordination_settings->configuration_change_tries_count);
}
else if (task.action_type == ConfigUpdateActionType::RemoveServer)
{
LOG_INFO(log, "Will try to remove server with id {}", task.server->get_id());
bool removed = false;
if (task.server->get_id() == state_manager->server_id())
{
LOG_INFO(log, "Trying to remove leader node (ourself), so will yield leadership and some other node (new leader) will try remove us. "
"Probably you will have to run SYSTEM RELOAD CONFIG on the new leader node");
raft_instance->yield_leadership();
return;
}
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count; ++i)
{
if (raft_instance->get_srv_config(task.server->get_id()) == nullptr)
{
LOG_INFO(log, "Server with id {} was successfully removed", task.server->get_id());
removed = true;
break;
}
if (!isLeader())
{
LOG_INFO(log, "We are not leader anymore, will not try to remove server {}", task.server->get_id());
break;
}
auto result = raft_instance->remove_srv(task.server->get_id());
if (!result->get_accepted())
LOG_INFO(log, "Command to remove server {} was not accepted for the {} time, will sleep for {} ms and retry", task.server->get_id(), i + 1, sleep_ms * (i + 1));
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1)));
}
if (!removed)
throw Exception(ErrorCodes::RAFT_ERROR, "Configuration change to remove server (id {}) was not accepted by RAFT after all {} retries", task.server->get_id(), coordination_settings->configuration_change_tries_count);
}
else if (task.action_type == ConfigUpdateActionType::UpdatePriority)
raft_instance->set_priority(task.server->get_id(), task.server->get_priority());
else
LOG_WARNING(log, "Unknown configuration update type {}", static_cast<uint64_t>(task.action_type));
}
bool KeeperServer::waitConfigurationUpdate(const ConfigUpdateAction & task)
{
size_t sleep_ms = 500;
if (task.action_type == ConfigUpdateActionType::AddServer)
{
LOG_INFO(log, "Will try to wait server with id {} to be added", task.server->get_id());
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count; ++i)
{
if (raft_instance->get_srv_config(task.server->get_id()) != nullptr)
{
LOG_INFO(log, "Server with id {} was successfully added by leader", task.server->get_id());
return true;
}
if (isLeader())
{
LOG_INFO(log, "We are leader now, probably we will have to add server {}", task.server->get_id());
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1)));
}
return false;
}
else if (task.action_type == ConfigUpdateActionType::RemoveServer)
{
LOG_INFO(log, "Will try to wait remove of server with id {}", task.server->get_id());
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count; ++i)
{
if (raft_instance->get_srv_config(task.server->get_id()) == nullptr)
{
LOG_INFO(log, "Server with id {} was successfully removed by leader", task.server->get_id());
return true;
}
if (isLeader())
{
LOG_INFO(log, "We are leader now, probably we will have to remove server {}", task.server->get_id());
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1)));
}
return false;
}
else if (task.action_type == ConfigUpdateActionType::UpdatePriority)
return true;
else
LOG_WARNING(log, "Unknown configuration update type {}", static_cast<uint64_t>(task.action_type));
return true;
}
}

View File

@ -89,6 +89,18 @@ public:
void shutdown();
int getServerID() const { return server_id; }
/// Get configuration diff between current configuration in RAFT and in XML file
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config);
/// Apply action for configuration update. Actually call raft_instance->remove_srv or raft_instance->add_srv.
/// Synchronously check for update results with retries.
void applyConfigurationUpdate(const ConfigUpdateAction & task);
/// Wait configuration update for action. Used by followers.
/// Return true if update was successfully received.
bool waitConfigurationUpdate(const ConfigUpdateAction & task);
};
}

View File

@ -205,9 +205,17 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
writeBinary(id, out);
}
}
/// Serialize cluster config
if (snapshot.cluster_config)
{
auto buffer = snapshot.cluster_config->serialize();
writeVarUInt(buffer->size(), out);
out.write(reinterpret_cast<const char *>(buffer->data_begin()), buffer->size());
}
}
SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage, ReadBuffer & in)
void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in)
{
uint8_t version;
readBinary(version, in);
@ -215,11 +223,13 @@ SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage,
if (current_version > CURRENT_SNAPSHOT_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported snapshot version {}", version);
SnapshotMetadataPtr result = deserializeSnapshotMetadata(in);
deserialization_result.snapshot_meta = deserializeSnapshotMetadata(in);
KeeperStorage & storage = *deserialization_result.storage;
int64_t session_id;
readBinary(session_id, in);
storage.zxid = result->get_last_log_idx();
storage.zxid = deserialization_result.snapshot_meta->get_last_log_idx();
storage.session_id_counter = session_id;
/// Before V1 we serialized ACL without acl_map
@ -309,13 +319,24 @@ SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage,
current_session_size++;
}
return result;
/// Optional cluster config
ClusterConfigPtr cluster_config = nullptr;
if (!in.eof())
{
size_t data_size;
readVarUInt(data_size, in);
auto buffer = nuraft::buffer::alloc(data_size);
in.readStrict(reinterpret_cast<char *>(buffer->data_begin()), data_size);
buffer->pos(0);
deserialization_result.cluster_config = ClusterConfig::deserialize(*buffer);
}
}
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_)
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_)
: storage(storage_)
, snapshot_meta(std::make_shared<SnapshotMetadata>(up_to_log_idx_, 0, std::make_shared<nuraft::cluster_config>()))
, session_id(storage->session_id_counter)
, cluster_config(cluster_config_)
{
storage->enableSnapshotMode();
snapshot_container_size = storage->container.snapshotSize();
@ -325,10 +346,11 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t
session_and_auth = storage->session_and_auth;
}
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_)
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_)
: storage(storage_)
, snapshot_meta(snapshot_meta_)
, session_id(storage->session_id_counter)
, cluster_config(cluster_config_)
{
storage->enableSnapshotMode();
snapshot_container_size = storage->container.snapshotSize();
@ -461,7 +483,7 @@ bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr<nuraft::buffer> buffer)
return magic_from_buffer == ZSTD_COMPRESSED_MAGIC;
}
SnapshotMetaAndStorage KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
{
bool is_zstd_compressed = isZstdCompressed(buffer);
@ -473,12 +495,13 @@ SnapshotMetaAndStorage KeeperSnapshotManager::deserializeSnapshotFromBuffer(nura
else
compressed_reader = std::make_unique<CompressedReadBuffer>(*reader);
auto storage = std::make_unique<KeeperStorage>(storage_tick_time, superdigest);
auto snapshot_metadata = KeeperStorageSnapshot::deserialize(*storage, *compressed_reader);
return std::make_pair(snapshot_metadata, std::move(storage));
SnapshotDeserializationResult result;
result.storage = std::make_unique<KeeperStorage>(storage_tick_time, superdigest);
KeeperStorageSnapshot::deserialize(result, *compressed_reader);
return result;
}
SnapshotMetaAndStorage KeeperSnapshotManager::restoreFromLatestSnapshot()
SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot()
{
if (existing_snapshots.empty())
return {};
@ -502,7 +525,6 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx);
std::filesystem::remove(itr->second);
existing_snapshots.erase(itr);
}

View File

@ -9,6 +9,8 @@ namespace DB
using SnapshotMetadata = nuraft::snapshot;
using SnapshotMetadataPtr = std::shared_ptr<SnapshotMetadata>;
using ClusterConfig = nuraft::cluster_config;
using ClusterConfigPtr = nuraft::ptr<ClusterConfig>;
enum SnapshotVersion : uint8_t
{
@ -20,6 +22,17 @@ enum SnapshotVersion : uint8_t
static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V3;
/// What is stored in binary shapsnot
struct SnapshotDeserializationResult
{
/// Storage
KeeperStoragePtr storage;
/// Snapshot metadata (up_to_log_idx and so on)
SnapshotMetadataPtr snapshot_meta;
/// Cluster config
ClusterConfigPtr cluster_config;
};
/// In memory keeper snapshot. Keeper Storage based on a hash map which can be
/// turned into snapshot mode. This operation is fast and KeeperStorageSnapshot
/// class do it in constructor. It also copies iterators from storage hash table
@ -31,14 +44,15 @@ static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V3;
struct KeeperStorageSnapshot
{
public:
KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_);
KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr);
KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr);
KeeperStorageSnapshot(KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_);
~KeeperStorageSnapshot();
static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out);
static SnapshotMetadataPtr deserialize(KeeperStorage & storage, ReadBuffer & in);
static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in);
KeeperStorage * storage;
@ -58,6 +72,8 @@ public:
KeeperStorage::SessionAndAuth session_and_auth;
/// ACLs cache for better performance. Without we cannot deserialize storage.
std::unordered_map<uint64_t, Coordination::ACLs> acl_map;
/// Cluster config from snapshot, can be empty
ClusterConfigPtr cluster_config;
};
using KeeperStorageSnapshotPtr = std::shared_ptr<KeeperStorageSnapshot>;
@ -76,7 +92,7 @@ public:
bool compress_snapshots_zstd_ = true, const std::string & superdigest_ = "", size_t storage_tick_time_ = 500);
/// Restore storage from latest available snapshot
SnapshotMetaAndStorage restoreFromLatestSnapshot();
SnapshotDeserializationResult restoreFromLatestSnapshot();
/// Compress snapshot and serialize it to buffer
nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const;
@ -84,7 +100,7 @@ public:
/// Serialize already compressed snapshot to disk (return path)
std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx);
SnapshotMetaAndStorage deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
/// Deserialize snapshot with log index up_to_log_idx from disk into compressed nuraft buffer.
nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const;

View File

@ -74,7 +74,10 @@ void KeeperStateMachine::init()
try
{
latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index);
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
storage = std::move(snapshot_deserialization_result.storage);
latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta;
cluster_config = snapshot_deserialization_result.cluster_config;
last_committed_idx = latest_snapshot_meta->get_last_log_idx();
loaded = true;
break;
@ -152,13 +155,24 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
{ /// deserialize and apply snapshot to storage
std::lock_guard lock(storage_and_responses_lock);
std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf);
storage = std::move(snapshot_deserialization_result.storage);
latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta;
cluster_config = snapshot_deserialization_result.cluster_config;
}
last_committed_idx = s.get_last_log_idx();
return true;
}
void KeeperStateMachine::commit_config(const uint64_t /*log_idx*/, nuraft::ptr<nuraft::cluster_config> & new_conf)
{
std::lock_guard lock(cluster_config_lock);
auto tmp = new_conf->serialize();
cluster_config = ClusterConfig::deserialize(*tmp);
}
nuraft::ptr<nuraft::snapshot> KeeperStateMachine::last_snapshot()
{
/// Just return the latest snapshot.
@ -177,7 +191,7 @@ void KeeperStateMachine::create_snapshot(
CreateSnapshotTask snapshot_task;
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
std::lock_guard lock(storage_and_responses_lock);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy, getClusterConfig());
}
/// create snapshot task for background execution (in snapshot thread)
@ -239,7 +253,7 @@ void KeeperStateMachine::save_logical_snp_obj(
if (obj_id == 0) /// Fake snapshot required by NuRaft at startup
{
std::lock_guard lock(storage_and_responses_lock);
KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx());
KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx(), getClusterConfig());
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
}
else
@ -324,4 +338,16 @@ void KeeperStateMachine::shutdownStorage()
storage->finalize();
}
ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
{
std::lock_guard lock(cluster_config_lock);
if (cluster_config)
{
/// dumb way to return copy...
auto tmp = cluster_config->serialize();
return ClusterConfig::deserialize(*tmp);
}
return nullptr;
}
}

View File

@ -32,6 +32,9 @@ public:
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override;
/// Save new cluster config to our snapshot (copy of the config stored in StateManager)
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override;
/// Currently not supported
void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {}
@ -76,6 +79,8 @@ public:
void shutdownStorage();
ClusterConfigPtr getClusterConfig() const;
private:
/// In our state machine we always have a single snapshot which is stored
@ -109,8 +114,15 @@ private:
/// Last committed Raft log number.
std::atomic<uint64_t> last_committed_idx;
Poco::Logger * log;
/// Cluster config for our quorum.
/// It's a copy of config stored in StateManager, but here
/// we also write it to disk during snapshot. Must be used with lock.
mutable std::mutex cluster_config_lock;
ClusterConfigPtr cluster_config;
/// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest;
};

View File

@ -31,42 +31,22 @@ namespace
}
KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, my_port(port)
, secure(false)
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false, false))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
cluster_config->get_servers().push_back(peer_config);
}
KeeperStateManager::KeeperStateManager(
int my_server_id_,
const std::string & config_prefix,
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings,
bool standalone_keeper)
: my_server_id(my_server_id_)
, secure(config.getBool(config_prefix + ".raft_configuration.secure", false))
, log_store(nuraft::cs_new<KeeperLogStore>(
getLogsPathFromConfig(config_prefix, config, standalone_keeper),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync, coordination_settings->compress_logs))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
KeeperConfigurationWrapper KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const
{
KeeperConfigurationWrapper result;
result.cluster_config = std::make_shared<nuraft::cluster_config>();
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix + ".raft_configuration", keys);
total_servers = keys.size();
size_t total_servers = 0;
for (const auto & server_key : keys)
{
if (!startsWith(server_key, "server"))
continue;
std::string full_prefix = config_prefix + ".raft_configuration." + server_key;
int server_id = config.getInt(full_prefix + ".id");
int new_server_id = config.getInt(full_prefix + ".id");
std::string hostname = config.getString(full_prefix + ".hostname");
int port = config.getInt(full_prefix + ".port");
bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
@ -74,24 +54,55 @@ KeeperStateManager::KeeperStateManager(
bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
if (start_as_follower)
start_as_follower_servers.insert(server_id);
result.servers_start_as_followers.insert(new_server_id);
auto endpoint = hostname + ":" + std::to_string(port);
auto peer_config = nuraft::cs_new<nuraft::srv_config>(server_id, 0, endpoint, "", !can_become_leader, priority);
if (server_id == my_server_id)
auto peer_config = nuraft::cs_new<nuraft::srv_config>(new_server_id, 0, endpoint, "", !can_become_leader, priority);
if (my_server_id == new_server_id)
{
my_server_config = peer_config;
my_port = port;
result.config = peer_config;
result.port = port;
}
cluster_config->get_servers().push_back(peer_config);
result.cluster_config->get_servers().push_back(peer_config);
total_servers++;
}
if (!my_server_config)
if (!result.config && !allow_without_us)
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id);
if (start_as_follower_servers.size() == cluster_config->get_servers().size())
if (result.servers_start_as_followers.size() == total_servers)
throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without <start_as_follower>)");
return result;
}
KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, secure(false)
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false, false))
{
auto peer_config = nuraft::cs_new<nuraft::srv_config>(my_server_id, host + ":" + std::to_string(port));
configuration_wrapper.cluster_config = nuraft::cs_new<nuraft::cluster_config>();
configuration_wrapper.port = port;
configuration_wrapper.config = peer_config;
configuration_wrapper.cluster_config->get_servers().push_back(peer_config);
}
KeeperStateManager::KeeperStateManager(
int server_id_,
const std::string & config_prefix_,
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings,
bool standalone_keeper)
: my_server_id(server_id_)
, secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false))
, config_prefix(config_prefix_)
, configuration_wrapper(parseServersConfiguration(config, false))
, log_store(nuraft::cs_new<KeeperLogStore>(
getLogsPathFromConfig(config_prefix_, config, standalone_keeper),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync, coordination_settings->compress_logs))
{
}
void KeeperStateManager::loadLogStore(uint64_t last_commited_index, uint64_t logs_to_keep)
@ -99,6 +110,14 @@ void KeeperStateManager::loadLogStore(uint64_t last_commited_index, uint64_t log
log_store->init(last_commited_index, logs_to_keep);
}
ClusterConfigPtr KeeperStateManager::getLatestConfigFromLogStore() const
{
auto entry_with_change = log_store->getLatestConfigChange();
if (entry_with_change)
return ClusterConfig::deserialize(entry_with_change->get_buf());
return nullptr;
}
void KeeperStateManager::flushLogStore()
{
log_store->flush();
@ -106,18 +125,67 @@ void KeeperStateManager::flushLogStore()
void KeeperStateManager::save_config(const nuraft::cluster_config & config)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.
std::lock_guard lock(configuration_wrapper_mutex);
nuraft::ptr<nuraft::buffer> buf = config.serialize();
cluster_config = nuraft::cluster_config::deserialize(*buf);
configuration_wrapper.cluster_config = nuraft::cluster_config::deserialize(*buf);
}
void KeeperStateManager::save_state(const nuraft::srv_state & state)
{
// Just keep in memory in this example.
// Need to write to disk here, if want to make it durable.
nuraft::ptr<nuraft::buffer> buf = state.serialize();
server_state = nuraft::srv_state::deserialize(*buf);
}
nuraft::ptr<nuraft::buffer> buf = state.serialize();
server_state = nuraft::srv_state::deserialize(*buf);
}
ConfigUpdateActions KeeperStateManager::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const
{
auto new_configuration_wrapper = parseServersConfiguration(config, true);
std::unordered_map<int, KeeperServerConfigPtr> new_ids, old_ids;
for (const auto & new_server : new_configuration_wrapper.cluster_config->get_servers())
new_ids[new_server->get_id()] = new_server;
{
std::lock_guard lock(configuration_wrapper_mutex);
for (const auto & old_server : configuration_wrapper.cluster_config->get_servers())
old_ids[old_server->get_id()] = old_server;
}
ConfigUpdateActions result;
/// First of all add new servers
for (auto [new_id, server_config] : new_ids)
{
if (!old_ids.count(new_id))
result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::AddServer, server_config});
}
/// After that remove old ones
for (auto [old_id, server_config] : old_ids)
{
if (!new_ids.count(old_id))
result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::RemoveServer, server_config});
}
{
std::lock_guard lock(configuration_wrapper_mutex);
/// And update priority if required
for (const auto & old_server : configuration_wrapper.cluster_config->get_servers())
{
for (const auto & new_server : new_configuration_wrapper.cluster_config->get_servers())
{
if (old_server->get_id() == new_server->get_id())
{
if (old_server->get_priority() != new_server->get_priority())
{
result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::UpdatePriority, new_server});
}
break;
}
}
}
}
return result;
}
}

View File

@ -6,20 +6,57 @@
#include <Coordination/CoordinationSettings.h>
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <Poco/Util/AbstractConfiguration.h>
#include <Coordination/KeeperSnapshotManager.h>
namespace DB
{
using KeeperServerConfigPtr = nuraft::ptr<nuraft::srv_config>;
/// Wrapper struct for Keeper cluster config. We parse this
/// info from XML files.
struct KeeperConfigurationWrapper
{
/// Our port
int port;
/// Our config
KeeperServerConfigPtr config;
/// Servers id's to start as followers
std::unordered_set<int> servers_start_as_followers;
/// Cluster config
ClusterConfigPtr cluster_config;
};
/// When our configuration changes the following action types
/// can happen
enum class ConfigUpdateActionType
{
RemoveServer,
AddServer,
UpdatePriority,
};
/// Action to update configuration
struct ConfigUpdateAction
{
ConfigUpdateActionType action_type;
KeeperServerConfigPtr server;
};
using ConfigUpdateActions = std::vector<ConfigUpdateAction>;
/// Responsible for managing our and cluster configuration
class KeeperStateManager : public nuraft::state_mgr
{
public:
KeeperStateManager(
int server_id_,
const std::string & config_prefix,
const std::string & config_prefix_,
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings,
bool standalone_keeper);
/// Constructor for tests
KeeperStateManager(
int server_id_,
const std::string & host,
@ -30,8 +67,14 @@ public:
void flushLogStore();
nuraft::ptr<nuraft::cluster_config> load_config() override { return cluster_config; }
/// Called on server start, in our case we don't use any separate logic for load
nuraft::ptr<nuraft::cluster_config> load_config() override
{
std::lock_guard lock(configuration_wrapper_mutex);
return configuration_wrapper.cluster_config;
}
/// Save cluster config (i.e. nodes, their priorities and so on)
void save_config(const nuraft::cluster_config & config) override;
void save_state(const nuraft::srv_state & state) override;
@ -40,17 +83,22 @@ public:
nuraft::ptr<nuraft::log_store> load_log_store() override { return log_store; }
Int32 server_id() override { return my_server_id; }
int32_t server_id() override { return my_server_id; }
nuraft::ptr<nuraft::srv_config> get_srv_config() const { return my_server_config; }
nuraft::ptr<nuraft::srv_config> get_srv_config() const { return configuration_wrapper.config; }
void system_exit(const int /* exit_code */) override {}
int getPort() const { return my_port; }
int getPort() const
{
std::lock_guard lock(configuration_wrapper_mutex);
return configuration_wrapper.port;
}
bool shouldStartAsFollower() const
{
return start_as_follower_servers.count(my_server_id);
std::lock_guard lock(configuration_wrapper_mutex);
return configuration_wrapper.servers_start_as_followers.count(my_server_id);
}
bool isSecure() const
@ -60,18 +108,31 @@ public:
nuraft::ptr<KeeperLogStore> getLogStore() const { return log_store; }
uint64_t getTotalServers() const { return total_servers; }
uint64_t getTotalServers() const
{
std::lock_guard lock(configuration_wrapper_mutex);
return configuration_wrapper.cluster_config->get_servers().size();
}
/// Read all log entries in log store from the begging and return latest config (with largest log_index)
ClusterConfigPtr getLatestConfigFromLogStore() const;
/// Get configuration diff between proposed XML and current state in RAFT
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const;
private:
int my_server_id;
int my_port;
bool secure;
uint64_t total_servers{0};
std::unordered_set<int> start_as_follower_servers;
std::string config_prefix;
mutable std::mutex configuration_wrapper_mutex;
KeeperConfigurationWrapper configuration_wrapper;
nuraft::ptr<KeeperLogStore> log_store;
nuraft::ptr<nuraft::srv_config> my_server_config;
nuraft::ptr<nuraft::cluster_config> cluster_config;
nuraft::ptr<nuraft::srv_state> server_state;
/// Parse configuration from xml config.
KeeperConfigurationWrapper parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const;
};
}

View File

@ -962,7 +962,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple)
auto debuf = manager.deserializeSnapshotBufferFromDisk(2);
auto [snapshot_meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
auto [restored_storage, snapshot_meta, _] = manager.deserializeSnapshotFromBuffer(debuf);
EXPECT_EQ(restored_storage->container.size(), 3);
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
@ -1011,7 +1011,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites)
auto debuf = manager.deserializeSnapshotBufferFromDisk(50);
auto [meta, restored_storage] = manager.deserializeSnapshotFromBuffer(debuf);
auto [restored_storage, meta, _] = manager.deserializeSnapshotFromBuffer(debuf);
EXPECT_EQ(restored_storage->container.size(), 51);
for (size_t i = 0; i < 50; ++i)
@ -1050,7 +1050,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots)
EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin" + params.extension));
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
auto [restored_storage, meta, _] = manager.restoreFromLatestSnapshot();
EXPECT_EQ(restored_storage->container.size(), 251);
@ -1103,7 +1103,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode)
EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
}
auto [meta, restored_storage] = manager.restoreFromLatestSnapshot();
auto [restored_storage, meta, _] = manager.restoreFromLatestSnapshot();
for (size_t i = 0; i < 50; ++i)
{
@ -1498,7 +1498,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
auto debuf = new_manager.deserializeSnapshotBufferFromDisk(2);
auto [snapshot_meta, restored_storage] = new_manager.deserializeSnapshotFromBuffer(debuf);
auto [restored_storage, snapshot_meta, _] = new_manager.deserializeSnapshotFromBuffer(debuf);
EXPECT_EQ(restored_storage->container.size(), 3);
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);

View File

@ -12,7 +12,9 @@
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54431
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54456
#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405

View File

@ -298,10 +298,30 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
/// Write host name to replica_path, it will protect from multiple replicas with the same name
auto host_id = getHostID(getContext(), db_uuid);
Coordination::Requests ops;
ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent));
current_zookeeper->multi(ops);
for (int attempts = 10; attempts > 0; --attempts)
{
Coordination::Stat stat;
String max_log_ptr_str = current_zookeeper->get(zookeeper_path + "/max_log_ptr", &stat);
Coordination::Requests ops;
ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent));
/// In addition to creating the replica nodes, we record the max_log_ptr at the instant where
/// we declared ourself as an existing replica. We'll need this during recoverLostReplica to
/// notify other nodes that issued new queries while this node was recovering.
ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/max_log_ptr", stat.version));
Coordination::Responses responses;
const auto code = current_zookeeper->tryMulti(ops, responses);
if (code == Coordination::Error::ZOK)
{
max_log_ptr_at_creation = parse<UInt32>(max_log_ptr_str);
break;
}
else if (code == Coordination::Error::ZNODEEXISTS || attempts == 1)
{
/// If its our last attempt, or if the replica already exists, fail immediately.
zkutil::KeeperMultiException::check(code, ops, responses);
}
}
createEmptyLogEntry(current_zookeeper);
}
@ -621,6 +641,21 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
InterpreterCreateQuery(query_ast, create_query_context).execute();
}
if (max_log_ptr_at_creation != 0)
{
/// If the replica is new and some of the queries applied during recovery
/// where issued after the replica was created, then other nodes might be
/// waiting for this node to notify them that the query was applied.
for (UInt32 ptr = max_log_ptr_at_creation; ptr <= max_log_ptr; ++ptr)
{
auto entry_name = DDLTaskBase::getLogEntryName(ptr);
auto path = fs::path(zookeeper_path) / "log" / entry_name / "finished" / getFullReplicaName();
auto status = ExecutionStatus(0).serializeText();
auto res = current_zookeeper->tryCreate(path, status, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
LOG_INFO(log, "Marked recovered {} as finished", entry_name);
}
}
current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr));
}

View File

@ -94,6 +94,7 @@ private:
std::atomic_bool is_readonly = true;
std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
UInt32 max_log_ptr_at_creation = 0;
mutable ClusterPtr cluster;
};

View File

@ -484,24 +484,37 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
}
template <DictionaryKeyType dictionary_key_type>
Pipe CacheDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
Pipe CacheDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
Pipe pipe;
std::optional<DictionarySourceData> data;
ColumnsWithTypeAndName key_columns;
{
/// Write lock on storage
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
data.emplace(shared_from_this(), cache_storage_ptr->getCachedSimpleKeys(), column_names);
{
auto keys = cache_storage_ptr->getCachedSimpleKeys();
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
}
else
{
auto keys = cache_storage_ptr->getCachedComplexKeys();
data.emplace(shared_from_this(), keys, column_names);
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
}
}
return Pipe(std::make_shared<DictionarySource>(std::move(*data), max_block_size));
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}
template <DictionaryKeyType dictionary_key_type>

View File

@ -136,7 +136,7 @@ public:
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
std::exception_ptr getLastException() const override;

View File

@ -668,6 +668,16 @@ static ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array)
return column_vector;
}
template <typename T>
static ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array, size_t start, size_t length)
{
auto column_vector = ColumnVector<T>::create();
column_vector->getData().reserve(length);
column_vector->getData().insert(array.begin() + start, array.begin() + start + length);
return column_vector;
}
}

View File

@ -7,155 +7,179 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNSUPPORTED_METHOD;
}
DictionarySourceData::DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary_, PaddedPODArray<UInt64> && ids_, const Names & column_names_)
: num_rows(ids_.size())
, dictionary(dictionary_)
, column_names(column_names_.begin(), column_names_.end())
, ids(std::move(ids_))
, key_type(DictionaryInputStreamKeyType::Id)
bool DictionarySourceCoordinator::getKeyColumnsNextRangeToRead(ColumnsWithTypeAndName & key_columns, ColumnsWithTypeAndName & data_columns)
{
size_t read_block_index = parallel_read_block_index++;
size_t start = read_block_index * max_block_size;
size_t end = (read_block_index + 1) * max_block_size;
size_t keys_size = key_columns_with_type[0].column->size();
if (start >= keys_size)
return false;
end = std::min(end, keys_size);
size_t length = end - start;
key_columns = cutColumns(key_columns_with_type, start, length);
data_columns = cutColumns(data_columns_with_type, start, length);
return true;
}
DictionarySourceData::DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary_,
const PaddedPODArray<StringRef> & keys,
const Names & column_names_)
: num_rows(keys.size())
, dictionary(dictionary_)
, column_names(column_names_.begin(), column_names_.end())
, key_type(DictionaryInputStreamKeyType::ComplexKey)
void DictionarySourceCoordinator::initialize(const Names & column_names)
{
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dictionary_structure, keys, 0, keys.size());
}
ColumnsWithTypeAndName columns_with_type;
DictionarySourceData::DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary_,
const Columns & data_columns_,
const Names & column_names_,
GetColumnsFunction && get_key_columns_function_,
GetColumnsFunction && get_view_columns_function_)
: num_rows(data_columns_.front()->size())
, dictionary(dictionary_)
, column_names(column_names_.begin(), column_names_.end())
, data_columns(data_columns_)
, get_key_columns_function(std::move(get_key_columns_function_))
, get_view_columns_function(std::move(get_view_columns_function_))
, key_type(DictionaryInputStreamKeyType::Callback)
{
}
const auto & dictionary_structure = dictionary->getStructure();
Block DictionarySourceData::getBlock(size_t start, size_t length) const
{
/// TODO: Rewrite
switch (key_type)
for (const auto & column_name : column_names)
{
case DictionaryInputStreamKeyType::ComplexKey:
ColumnWithTypeAndName column_with_type;
column_with_type.name = column_name;
auto it = dictionary_structure.attribute_name_to_index.find(column_name);
if (it == dictionary_structure.attribute_name_to_index.end())
{
Columns columns;
ColumnsWithTypeAndName view_columns;
columns.reserve(key_columns.size());
for (const auto & key_column : key_columns)
if (dictionary_structure.id.has_value() && column_name == dictionary_structure.id->name)
{
ColumnPtr column = key_column.column->cut(start, length);
columns.emplace_back(column);
view_columns.emplace_back(column, key_column.type, key_column.name);
column_with_type.type = std::make_shared<DataTypeUInt64>();
}
return fillBlock({}, columns, {}, std::move(view_columns));
}
case DictionaryInputStreamKeyType::Id:
{
PaddedPODArray<UInt64> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
return fillBlock(ids_to_fill, {}, {}, {});
}
case DictionaryInputStreamKeyType::Callback:
{
Columns columns;
columns.reserve(data_columns.size());
for (const auto & data_column : data_columns)
columns.push_back(data_column->cut(start, length));
const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
const auto & attributes = *dictionaty_structure.key;
ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
DataTypes types;
columns.clear();
for (const auto & key_column : keys_with_type_and_name)
else if (dictionary_structure.range_min.has_value() && column_name == dictionary_structure.range_min->name)
{
columns.push_back(key_column.column);
types.push_back(key_column.type);
column_with_type.type = dictionary_structure.range_min->type;
}
return fillBlock({}, columns, types, std::move(view_with_type_and_name));
}
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected DictionaryInputStreamKeyType.");
}
Block DictionarySourceData::fillBlock(
const PaddedPODArray<UInt64> & ids_to_fill,
const Columns & keys,
const DataTypes & types,
ColumnsWithTypeAndName && view) const
{
DataTypes data_types = types;
ColumnsWithTypeAndName block_columns;
data_types.reserve(keys.size());
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
if (data_types.empty() && dictionary_structure.key)
for (const auto & key : *dictionary_structure.key)
data_types.push_back(key.type);
for (const auto & column : view)
if (column_names.find(column.name) != column_names.end())
block_columns.push_back(column);
const DictionaryStructure & structure = dictionary->getStructure();
ColumnPtr ids_column = getColumnFromPODArray(ids_to_fill);
if (structure.id && column_names.find(structure.id->name) != column_names.end())
{
block_columns.emplace_back(ids_column, std::make_shared<DataTypeUInt64>(), structure.id->name);
}
auto dictionary_key_type = dictionary->getKeyType();
for (const auto & attribute : structure.attributes)
{
if (column_names.find(attribute.name) != column_names.end())
{
ColumnPtr column;
if (dictionary_key_type == DictionaryKeyType::Simple)
else if (dictionary_structure.range_max.has_value() && column_name == dictionary_structure.range_max->name)
{
column = dictionary->getColumn(
attribute.name,
attribute.type,
{ids_column},
{std::make_shared<DataTypeUInt64>()},
nullptr /* default_values_column */);
column_with_type.type = dictionary_structure.range_max->type;
}
else if (dictionary_structure.key.has_value())
{
const auto & dictionary_key_attributes = *dictionary_structure.key;
for (const auto & attribute : dictionary_key_attributes)
{
if (column_name == attribute.name)
{
column_with_type.type = attribute.type;
break;
}
}
}
else
{
column = dictionary->getColumn(
attribute.name,
attribute.type,
keys,
data_types,
nullptr /* default_values_column*/);
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "No such column name {} in dictionary {}",
column_name,
dictionary->getDictionaryID().getNameForLogs());
}
block_columns.emplace_back(column, attribute.type, attribute.name);
}
else
{
const auto & attribute = dictionary_structure.attributes[it->second];
attributes_names_to_read.emplace_back(attribute.name);
attributes_types_to_read.emplace_back(attribute.type);
attributes_default_values_columns.emplace_back(nullptr);
column_with_type.type = attribute.type;
}
column_with_type.column = column_with_type.type->createColumn();
columns_with_type.emplace_back(std::move(column_with_type));
}
return Block(block_columns);
header = Block(std::move(columns_with_type));
}
ColumnsWithTypeAndName
DictionarySourceCoordinator::cutColumns(const ColumnsWithTypeAndName & columns_with_type, size_t start, size_t length)
{
ColumnsWithTypeAndName result;
result.reserve(columns_with_type.size());
for (const auto & column_with_type : columns_with_type)
{
ColumnWithTypeAndName result_column_with_type;
result_column_with_type.column = column_with_type.column->cut(start, length);
result_column_with_type.type = column_with_type.type;
result_column_with_type.name = column_with_type.name;
result.emplace_back(std::move(result_column_with_type));
}
return result;
}
Chunk DictionarySource::generate()
{
ColumnsWithTypeAndName key_columns_to_read;
ColumnsWithTypeAndName data_columns;
if (!coordinator->getKeyColumnsNextRangeToRead(key_columns_to_read, data_columns))
return {};
const auto & header = coordinator->getHeader();
std::vector<ColumnPtr> key_columns;
std::vector<DataTypePtr> key_types;
key_columns.reserve(key_columns_to_read.size());
key_types.reserve(key_columns_to_read.size());
std::unordered_map<std::string_view, ColumnPtr> name_to_column;
for (const auto & key_column_to_read : key_columns_to_read)
{
key_columns.emplace_back(key_column_to_read.column);
key_types.emplace_back(key_column_to_read.type);
if (header.has(key_column_to_read.name))
name_to_column.emplace(key_column_to_read.name, key_column_to_read.column);
}
for (const auto & data_column : data_columns)
{
if (header.has(data_column.name))
name_to_column.emplace(data_column.name, data_column.column);
}
const auto & attributes_names_to_read = coordinator->getAttributesNamesToRead();
const auto & attributes_types_to_read = coordinator->getAttributesTypesToRead();
const auto & attributes_default_values_columns = coordinator->getAttributesDefaultValuesColumns();
const auto & dictionary = coordinator->getDictionary();
auto attributes_columns = dictionary->getColumns(
attributes_names_to_read,
attributes_types_to_read,
key_columns,
key_types,
attributes_default_values_columns);
for (size_t i = 0; i < attributes_names_to_read.size(); ++i)
{
const auto & attribute_name = attributes_names_to_read[i];
name_to_column.emplace(attribute_name, attributes_columns[i]);
}
std::vector<ColumnPtr> result_columns;
result_columns.reserve(header.columns());
for (const auto & column_with_type : header)
{
const auto & header_name = column_with_type.name;
auto it = name_to_column.find(header_name);
if (it == name_to_column.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name {} not found in result columns", header_name);
result_columns.emplace_back(it->second);
}
size_t rows_size = result_columns[0]->size();
return Chunk(result_columns, rows_size);
}
}

View File

@ -7,82 +7,91 @@
#include <Columns/IColumn.h>
#include <Core/Names.h>
#include <DataTypes/DataTypesNumber.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/DictionarySourceBase.h>
namespace DB
{
class DictionarySourceData
class DictionarySourceCoordinator
{
public:
DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary,
PaddedPODArray<UInt64> && ids,
const Names & column_names);
DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary,
const PaddedPODArray<StringRef> & keys,
const Names & column_names);
using GetColumnsFunction = std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
// Used to separate key columns format for storage and view.
// Calls get_key_columns_function to get key column for dictionary get function call
// and get_view_columns_function to get key representation.
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
DictionarySourceData(
std::shared_ptr<const IDictionary> dictionary,
const Columns & data_columns,
explicit DictionarySourceCoordinator(
std::shared_ptr<const IDictionary> dictionary_,
const Names & column_names,
GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function);
ColumnsWithTypeAndName && key_columns_with_type_,
size_t max_block_size_)
: dictionary(std::move(dictionary_))
, key_columns_with_type(std::move(key_columns_with_type_))
, max_block_size(max_block_size_)
{
initialize(column_names);
}
Block getBlock(size_t start, size_t length) const;
size_t getNumRows() const { return num_rows; }
explicit DictionarySourceCoordinator(
std::shared_ptr<const IDictionary> dictionary_,
const Names & column_names,
ColumnsWithTypeAndName && key_columns_with_type_,
ColumnsWithTypeAndName && data_columns_with_type_,
size_t max_block_size_)
: dictionary(std::move(dictionary_))
, key_columns_with_type(std::move(key_columns_with_type_))
, data_columns_with_type(std::move(data_columns_with_type_))
, max_block_size(max_block_size_)
{
initialize(column_names);
}
bool getKeyColumnsNextRangeToRead(ColumnsWithTypeAndName & key_columns, ColumnsWithTypeAndName & data_columns);
const Block & getHeader() const { return header; }
const std::vector<std::string> & getAttributesNamesToRead() const { return attributes_names_to_read; }
const std::vector<DataTypePtr> & getAttributesTypesToRead() const { return attributes_types_to_read; }
const std::vector<ColumnPtr> & getAttributesDefaultValuesColumns() const { return attributes_default_values_columns; }
const std::shared_ptr<const IDictionary> & getDictionary() const { return dictionary; }
private:
Block fillBlock(
const PaddedPODArray<UInt64> & ids_to_fill,
const Columns & keys,
const DataTypes & types,
ColumnsWithTypeAndName && view) const;
void initialize(const Names & column_names);
static ColumnsWithTypeAndName cutColumns(const ColumnsWithTypeAndName & columns_with_type, size_t start, size_t length);
const size_t num_rows;
std::shared_ptr<const IDictionary> dictionary;
std::unordered_set<std::string> column_names;
PaddedPODArray<UInt64> ids;
ColumnsWithTypeAndName key_columns;
Columns data_columns;
GetColumnsFunction get_key_columns_function;
GetColumnsFunction get_view_columns_function;
ColumnsWithTypeAndName key_columns_with_type;
ColumnsWithTypeAndName data_columns_with_type;
enum class DictionaryInputStreamKeyType
{
Id,
ComplexKey,
Callback
};
Block header;
DictionaryInputStreamKeyType key_type;
std::vector<std::string> attributes_names_to_read;
std::vector<DataTypePtr> attributes_types_to_read;
std::vector<ColumnPtr> attributes_default_values_columns;
const size_t max_block_size;
std::atomic<size_t> parallel_read_block_index = 0;
};
class DictionarySource final : public DictionarySourceBase
class DictionarySource : public SourceWithProgress
{
public:
DictionarySource(DictionarySourceData data_, UInt64 max_block_size)
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
, data(std::move(data_))
{}
explicit DictionarySource(std::shared_ptr<DictionarySourceCoordinator> coordinator_)
: SourceWithProgress(coordinator_->getHeader()), coordinator(std::move(coordinator_))
{
}
private:
String getName() const override { return "DictionarySource"; }
Block getBlock(size_t start, size_t length) const override { return data.getBlock(start, length); }
DictionarySourceData data;
Chunk generate() override;
std::shared_ptr<DictionarySourceCoordinator> coordinator;
};
}

View File

@ -1,21 +0,0 @@
#include "DictionarySourceBase.h"
namespace DB
{
DictionarySourceBase::DictionarySourceBase(const Block & header, size_t rows_count_, size_t max_block_size_)
: SourceWithProgress(header), rows_count(rows_count_), max_block_size(max_block_size_)
{
}
Chunk DictionarySourceBase::generate()
{
if (next_row == rows_count)
return {};
size_t size = std::min(max_block_size, rows_count - next_row);
auto block = getBlock(next_row, size);
next_row += size;
return Chunk(block.getColumns(), size);
}
}

View File

@ -1,22 +0,0 @@
#pragma once
#include <Processors/Sources/SourceWithProgress.h>
namespace DB
{
class DictionarySourceBase : public SourceWithProgress
{
protected:
DictionarySourceBase(const Block & header, size_t rows_count_, size_t max_block_size_);
virtual Block getBlock(size_t start, size_t length) const = 0;
private:
const size_t rows_count;
const size_t max_block_size;
size_t next_row = 0;
Chunk generate() override;
};
}

View File

@ -290,7 +290,7 @@ Pipe DirectDictionary<dictionary_key_type>::getSourceBlockInputStream(
}
template <DictionaryKeyType dictionary_key_type>
Pipe DirectDictionary<dictionary_key_type>::read(const Names & /* column_names */, size_t /* max_block_size */) const
Pipe DirectDictionary<dictionary_key_type>::read(const Names & /* column_names */, size_t /* max_block_size */, size_t /* num_streams */) const
{
return source_ptr->loadAll();
}

View File

@ -96,7 +96,7 @@ public:
ColumnPtr in_key_column,
const DataTypePtr & key_type) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
Pipe getSourceBlockInputStream(const Columns & key_columns, const PaddedPODArray<KeyType> & requested_keys) const;

View File

@ -538,7 +538,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key,
callOnDictionaryAttributeType(attribute.type, type_call);
}
Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size) const
Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
const auto keys_count = loaded_keys.size();
@ -549,8 +549,20 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size) con
if (loaded_keys[key_index])
keys.push_back(key_index);
return Pipe(std::make_shared<DictionarySource>(
DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size));
ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}
void registerDictionaryFlat(DictionaryFactory & factory)

View File

@ -97,7 +97,7 @@ public:
const DataTypePtr & key_type,
size_t level) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
template <typename Value>

View File

@ -55,95 +55,93 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
const size_t size = extractor.getKeysSize();
const size_t keys_size = extractor.getKeysSize();
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index];
bool is_attribute_nullable = attribute.is_index_null.has_value();
return getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
}
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_index_null)
template <DictionaryKeyType dictionary_key_type>
Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types,
const Columns & default_values_columns) const
{
if (dictionary_key_type == DictionaryKeyType::Complex)
dict_struct.validateKeyTypes(key_types);
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
const size_t keys_size = extractor.getKeysSize();
PaddedPODArray<ssize_t> key_index_to_element_index;
/** Optimization for multiple attributes.
* For each key save element index in key_index_to_element_index array.
* Later in type_call for attribute use getItemsImpl specialization with key_index_to_element_index array
* instead of DictionaryKeyExtractor.
*/
if (attribute_names.size() > 1)
{
col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData();
const auto & key_attribute_container = key_attribute.container;
size_t keys_found = 0;
key_index_to_element_index.resize(keys_size);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = extractor.extractCurrentKey();
auto it = key_attribute_container.find(key);
if (it == key_attribute_container.end())
{
key_index_to_element_index[key_index] = -1;
}
else
{
key_index_to_element_index[key_index] = it->getMapped();
++keys_found;
}
extractor.rollbackCurrentKey();
}
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
auto type_call = [&](const auto & dictionary_attribute_type)
size_t attribute_names_size = attribute_names.size();
Columns result_columns;
result_columns.reserve(attribute_names_size);
for (size_t i = 0; i < attribute_names_size; ++i)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
ColumnPtr result_column;
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
const auto & attribute_name = attribute_names[i];
const auto & result_type = result_types[i];
const auto & default_values_column = default_values_columns[i];
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index];
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
getItemsImpl<ValueType, false>(
attribute,
extractor,
[&](const size_t, const Array & value, bool) { out->insert(value); },
default_value_extractor);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
extractor,
[&](size_t row, const StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
extractor,
[&](size_t, const StringRef value, bool) { out->insertData(value.data, value.size); },
default_value_extractor);
}
if (attribute_names_size > 1)
result_column = getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, key_index_to_element_index);
else
{
auto & out = column->getData();
result_column = getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
extractor,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
extractor,
[&](size_t row, const auto value, bool) { out[row] = value; },
default_value_extractor);
}
result_columns.emplace_back(std::move(result_column));
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (is_attribute_nullable)
result = ColumnNullable::create(std::move(result), std::move(col_null_map_to));
return result;
return result_columns;
}
template <DictionaryKeyType dictionary_key_type>
@ -499,6 +497,102 @@ void HashedArrayDictionary<dictionary_key_type>::resize(size_t added_rows)
key_attribute.container.reserve(added_rows);
}
template <DictionaryKeyType dictionary_key_type>
template <typename KeysProvider>
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
const Attribute & attribute,
const DictionaryAttribute & dictionary_attribute,
size_t keys_size,
ColumnPtr default_values_column,
KeysProvider && keys_object) const
{
ColumnPtr result;
bool is_attribute_nullable = attribute.is_index_null.has_value();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_index_null)
{
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
getItemsImpl<ValueType, false>(
attribute,
keys_object,
[&](const size_t, const Array & value, bool) { out->insert(value); },
default_value_extractor);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
keys_object,
[&](size_t row, const StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
keys_object,
[&](size_t, const StringRef value, bool) { out->insertData(value.data, value.size); },
default_value_extractor);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
keys_object,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
keys_object,
[&](size_t row, const auto value, bool) { out[row] = value; },
default_value_extractor);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (is_attribute_nullable)
result = ColumnNullable::create(std::move(result), std::move(col_null_map_to));
return result;
}
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
@ -547,6 +641,41 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<ssize_t> & key_index_to_element_index,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
const size_t keys_size = key_index_to_element_index.size();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
bool key_exists = key_index_to_element_index[key_index] != -1;
if (key_exists)
{
size_t element_index = static_cast<size_t>(key_index_to_element_index[key_index]);
const auto & element = attribute_container[element_index];
if constexpr (is_nullable)
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
else
set_value(key_index, element, false);
}
else
{
if constexpr (is_nullable)
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
else
set_value(key_index, default_value_extractor[key_index], false);
}
}
}
template <DictionaryKeyType dictionary_key_type>
StringRef HashedArrayDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
{
@ -634,7 +763,7 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
}
template <DictionaryKeyType dictionary_key_type>
Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
PaddedPODArray<HashedArrayDictionary::KeyType> keys;
keys.reserve(key_attribute.container.size());
@ -642,7 +771,25 @@ Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names
for (auto & [key, _] : key_attribute.container)
keys.emplace_back(key);
return Pipe(std::make_shared<DictionarySource>(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size));
ColumnsWithTypeAndName key_columns;
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
else
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}
template class HashedArrayDictionary<DictionaryKeyType::Simple>;

View File

@ -93,6 +93,13 @@ public:
const DataTypes & key_types,
const ColumnPtr & default_values_column) const override;
Columns getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types,
const Columns & default_values_columns) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); }
@ -109,7 +116,7 @@ public:
const DataTypePtr & key_type,
size_t level) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
@ -170,6 +177,14 @@ private:
void calculateBytesAllocated();
template <typename KeysProvider>
ColumnPtr getAttributeColumn(
const Attribute & attribute,
const DictionaryAttribute & dictionary_attribute,
size_t keys_size,
ColumnPtr default_values_column,
KeysProvider && keys_object) const;
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
@ -177,6 +192,13 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<ssize_t> & key_index_to_element_index,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename GetContainerFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func);

View File

@ -6,6 +6,8 @@
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <Processors/Sources/SourceWithProgress.h>
#include <Dictionaries//DictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>
@ -640,7 +642,7 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
}
template <DictionaryKeyType dictionary_key_type, bool sparse>
Pipe HashedDictionary<dictionary_key_type, sparse>::read(const Names & column_names, size_t max_block_size) const
Pipe HashedDictionary<dictionary_key_type, sparse>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
PaddedPODArray<HashedDictionary::KeyType> keys;
@ -669,7 +671,25 @@ Pipe HashedDictionary<dictionary_key_type, sparse>::read(const Names & column_na
});
}
return Pipe(std::make_shared<DictionarySource>(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size));
ColumnsWithTypeAndName key_columns;
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
else
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}
template <DictionaryKeyType dictionary_key_type, bool sparse>

View File

@ -115,7 +115,7 @@ public:
const DataTypePtr & key_type,
size_t level) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
template <typename Value>

View File

@ -106,7 +106,8 @@ public:
/// Insert default keys
virtual void insertDefaultKeys(const PaddedPODArray<StringRef> & keys) = 0;
/// Return cached simple keys
/// Return cached complex keys.
/// It is client responsibility to ensure keys proper lifetime.
virtual PaddedPODArray<StringRef> getCachedComplexKeys() const = 0;
/// Return size of keys in storage

View File

@ -110,7 +110,7 @@ struct IDictionary : public IExternalLoadable
*/
virtual DictionaryKeyType getKeyType() const = 0;
virtual DictionarySpecialKeyType getSpecialKeyType() const { return DictionarySpecialKeyType::None;}
virtual DictionarySpecialKeyType getSpecialKeyType() const { return DictionarySpecialKeyType::None; }
/** Subclass must validate key columns and keys types
* and return column representation of dictionary attribute.
@ -194,7 +194,7 @@ struct IDictionary : public IExternalLoadable
getDictionaryID().getNameForLogs());
}
virtual Pipe read(const Names & column_names, size_t max_block_size) const = 0;
virtual Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const = 0;
bool supportUpdates() const override { return true; }

View File

@ -807,13 +807,14 @@ Columns IPAddressDictionary::getKeyColumns() const
key_ip_column->insertData(data, IPV6_BINARY_LENGTH);
key_mask_column->insertValue(mask_column[row]);
}
return {std::move(key_ip_column), std::move(key_mask_column)};
}
template <typename KeyColumnType, bool IsIPv4>
static auto keyViewGetter()
{
return [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
return [](const Columns & columns, const std::vector<DictionaryAttribute> & dictonary_key_attributes)
{
auto column = ColumnString::create();
const auto & key_ip_column = assert_cast<const KeyColumnType &>(*columns.front());
@ -830,41 +831,52 @@ static auto keyViewGetter()
column->insertData(buffer, str_len);
}
return ColumnsWithTypeAndName{
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dictonary_key_attributes.front().name)};
};
}
Pipe IPAddressDictionary::read(const Names & column_names, size_t max_block_size) const
Pipe IPAddressDictionary::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
const bool is_ipv4 = std::get_if<IPv4Container>(&ip_column) != nullptr;
auto get_keys = [is_ipv4](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
{
const auto & attr = dict_attributes.front();
std::shared_ptr<const IDataType> key_typ;
if (is_ipv4)
key_typ = std::make_shared<DataTypeUInt32>();
else
key_typ = std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
auto key_columns = getKeyColumns();
return ColumnsWithTypeAndName({
ColumnWithTypeAndName(columns.front(), key_typ, attr.name),
ColumnWithTypeAndName(columns.back(), std::make_shared<DataTypeUInt8>(), attr.name + ".mask")
});
std::shared_ptr<const IDataType> key_type;
if (is_ipv4)
key_type = std::make_shared<DataTypeUInt32>();
else
key_type = std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH);
ColumnsWithTypeAndName key_columns_with_type = {
ColumnWithTypeAndName(key_columns.front(), key_type, ""),
ColumnWithTypeAndName(key_columns.back(), std::make_shared<DataTypeUInt8>(), "")
};
ColumnsWithTypeAndName view_columns;
if (is_ipv4)
{
auto get_view = keyViewGetter<ColumnVector<UInt32>, true>();
return Pipe(std::make_shared<DictionarySource>(
DictionarySourceData(shared_from_this(), getKeyColumns(), column_names, std::move(get_keys), std::move(get_view)),
max_block_size));
view_columns = get_view(key_columns, *dict_struct.key);
}
else
{
auto get_view = keyViewGetter<ColumnFixedString, false>();
view_columns = get_view(key_columns, *dict_struct.key);
}
auto get_view = keyViewGetter<ColumnFixedString, false>();
return Pipe(std::make_shared<DictionarySource>(
DictionarySourceData(shared_from_this(), getKeyColumns(), column_names, std::move(get_keys), std::move(get_view)),
max_block_size));
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns_with_type), std::move(view_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}
IPAddressDictionary::RowIdxConstIter IPAddressDictionary::ipNotFound() const

View File

@ -78,7 +78,7 @@ public:
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:

View File

@ -118,7 +118,7 @@ ColumnPtr IPolygonDictionary::getColumn(
return result;
}
Pipe IPolygonDictionary::read(const Names & column_names, size_t) const
Pipe IPolygonDictionary::read(const Names & column_names, size_t, size_t) const
{
if (!configuration.store_polygon_key_column)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,

View File

@ -106,7 +106,7 @@ public:
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
/** Single coordinate type. */
using Coord = Float32;

View File

@ -1,219 +0,0 @@
#pragma once
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/DictionarySourceBase.h>
#include <Dictionaries/DictionaryHelpers.h>
#include <Dictionaries/RangeHashedDictionary.h>
namespace DB
{
template <DictionaryKeyType dictionary_key_type, typename RangeType>
class RangeDictionarySourceData
{
public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
RangeDictionarySourceData(
std::shared_ptr<const IDictionary> dictionary,
const Names & column_names,
PaddedPODArray<KeyType> && keys,
PaddedPODArray<RangeType> && start_dates,
PaddedPODArray<RangeType> && end_dates);
Block getBlock(size_t start, size_t length) const;
size_t getNumRows() const { return keys.size(); }
private:
Block fillBlock(
const PaddedPODArray<KeyType> & keys_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates,
size_t start,
size_t end) const;
PaddedPODArray<Int64> makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
std::shared_ptr<const IDictionary> dictionary;
NameSet column_names;
PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
};
template <DictionaryKeyType dictionary_key_type, typename RangeType>
RangeDictionarySourceData<dictionary_key_type, RangeType>::RangeDictionarySourceData(
std::shared_ptr<const IDictionary> dictionary_,
const Names & column_names_,
PaddedPODArray<KeyType> && keys,
PaddedPODArray<RangeType> && block_start_dates,
PaddedPODArray<RangeType> && block_end_dates)
: dictionary(dictionary_)
, column_names(column_names_.begin(), column_names_.end())
, keys(std::move(keys))
, start_dates(std::move(block_start_dates))
, end_dates(std::move(block_end_dates))
{
}
template <DictionaryKeyType dictionary_key_type, typename RangeType>
Block RangeDictionarySourceData<dictionary_key_type, RangeType>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<KeyType> block_keys;
PaddedPODArray<RangeType> block_start_dates;
PaddedPODArray<RangeType> block_end_dates;
block_keys.reserve(length);
block_start_dates.reserve(length);
block_end_dates.reserve(length);
for (size_t index = start; index < start + length; ++index)
{
block_keys.push_back(keys[index]);
block_start_dates.push_back(start_dates[index]);
block_end_dates.push_back(end_dates[index]);
}
return fillBlock(block_keys, block_start_dates, block_end_dates, start, start + length);
}
template <DictionaryKeyType dictionary_key_type, typename RangeType>
PaddedPODArray<Int64> RangeDictionarySourceData<dictionary_key_type, RangeType>::makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const
{
PaddedPODArray<Int64> keys(block_start_dates.size());
for (size_t i = 0; i < keys.size(); ++i)
{
if (Range::isCorrectDate(block_start_dates[i]))
keys[i] = block_start_dates[i];
else
keys[i] = block_end_dates[i];
}
return keys;
}
template <DictionaryKeyType dictionary_key_type, typename RangeType>
Block RangeDictionarySourceData<dictionary_key_type, RangeType>::fillBlock(
const PaddedPODArray<KeyType> & keys_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates,
size_t start,
size_t end) const
{
ColumnsWithTypeAndName columns;
const DictionaryStructure & dictionary_structure = dictionary->getStructure();
DataTypes keys_types;
Columns keys_columns;
Strings keys_names = dictionary_structure.getKeysNames();
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
keys_columns = {getColumnFromPODArray(keys_to_fill)};
keys_types = {std::make_shared<DataTypeUInt64>()};
}
else
{
for (const auto & attribute : *dictionary_structure.key)
keys_types.emplace_back(attribute.type);
auto deserialized_columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end);
for (auto & deserialized_column : deserialized_columns)
keys_columns.emplace_back(std::move(deserialized_column));
}
size_t keys_size = keys_names.size();
assert(keys_columns.size() == keys_size);
assert(keys_types.size() == keys_size);
for (size_t i = 0; i < keys_size; ++i)
{
auto & key_name = keys_names[i];
if (column_names.find(key_name) != column_names.end())
columns.emplace_back(keys_columns[i], keys_types[i], key_name);
}
auto date_key = makeDateKeys(block_start_dates, block_end_dates);
auto date_column = getColumnFromPODArray(date_key);
keys_columns.emplace_back(std::move(date_column));
keys_types.emplace_back(std::make_shared<DataTypeInt64>());
const auto & range_min_column_name = dictionary_structure.range_min->name;
if (column_names.find(range_min_column_name) != column_names.end())
{
auto range_min_column = getColumnFromPODArray(block_start_dates);
columns.emplace_back(range_min_column, dictionary_structure.range_max->type, range_min_column_name);
}
const auto & range_max_column_name = dictionary_structure.range_max->name;
if (column_names.find(range_max_column_name) != column_names.end())
{
auto range_max_column = getColumnFromPODArray(block_end_dates);
columns.emplace_back(range_max_column, dictionary_structure.range_max->type, range_max_column_name);
}
size_t attributes_size = dictionary_structure.attributes.size();
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{
const auto & attribute = dictionary_structure.attributes[attribute_index];
if (column_names.find(attribute.name) == column_names.end())
continue;
auto column = dictionary->getColumn(
attribute.name,
attribute.type,
keys_columns,
keys_types,
nullptr /* default_values_column*/);
columns.emplace_back(std::move(column), attribute.type, attribute.name);
}
return Block(columns);
}
template <DictionaryKeyType dictionary_key_type, typename RangeType>
class RangeDictionarySource : public DictionarySourceBase
{
public:
RangeDictionarySource(RangeDictionarySourceData<dictionary_key_type, RangeType> data_, size_t max_block_size);
String getName() const override { return "RangeDictionarySource"; }
protected:
Block getBlock(size_t start, size_t length) const override;
RangeDictionarySourceData<dictionary_key_type, RangeType> data;
};
template <DictionaryKeyType dictionary_key_type, typename RangeType>
RangeDictionarySource<dictionary_key_type, RangeType>::RangeDictionarySource(RangeDictionarySourceData<dictionary_key_type, RangeType> data_, size_t max_block_size)
: DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size)
, data(std::move(data_))
{
}
template <DictionaryKeyType dictionary_key_type, typename RangeType>
Block RangeDictionarySource<dictionary_key_type, RangeType>::getBlock(size_t start, size_t length) const
{
return data.getBlock(start, length);
}
}

View File

@ -1,11 +1,14 @@
#include "RangeHashedDictionary.h"
#include <Dictionaries/RangeHashedDictionary.h>
#include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <base/Typelists.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/RangeDictionarySource.h>
#include <Dictionaries/DictionarySource.h>
namespace
@ -219,6 +222,7 @@ ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Colum
key_types_copy.pop_back();
dict_struct.validateKeyTypes(key_types_copy);
}
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
auto range_storage_column = key_columns.back();
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
@ -567,7 +571,7 @@ void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
{
const auto & attribute = attributes.front();
auto type_call = [&](const auto &dictionary_attribute_type)
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
@ -610,28 +614,6 @@ void RangeHashedDictionary<dictionary_key_type>::getKeysAndDates(
}
}
template <DictionaryKeyType dictionary_key_type>
template <typename RangeType>
Pipe RangeHashedDictionary<dictionary_key_type>::readImpl(const Names & column_names, size_t max_block_size) const
{
PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
getKeysAndDates(keys, start_dates, end_dates);
using RangeDictionarySourceType = RangeDictionarySource<dictionary_key_type, RangeType>;
auto source_data = RangeDictionarySourceData<dictionary_key_type, RangeType>(
shared_from_this(),
column_names,
std::move(keys),
std::move(start_dates),
std::move(end_dates));
auto source = std::make_shared<RangeDictionarySourceType>(std::move(source_data), max_block_size);
return Pipe(source);
}
template <DictionaryKeyType dictionary_key_type>
StringRef RangeHashedDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
{
@ -643,38 +625,86 @@ StringRef RangeHashedDictionary<dictionary_key_type>::copyKeyInArena(StringRef k
}
template <DictionaryKeyType dictionary_key_type>
struct RangeHashedDictionaryCallGetSourceImpl
template <typename RangeType>
PaddedPODArray<Int64> RangeHashedDictionary<dictionary_key_type>::makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const
{
Pipe pipe;
const RangeHashedDictionary<dictionary_key_type> * dict;
const Names * column_names;
size_t max_block_size;
PaddedPODArray<Int64> keys(block_start_dates.size());
template <class RangeType>
void operator()(Id<RangeType>)
for (size_t i = 0; i < keys.size(); ++i)
{
const auto & type = dict->dict_struct.range_min->type;
if (pipe.empty() && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
pipe = dict->template readImpl<RangeType>(*column_names, max_block_size);
if (Range::isCorrectDate(block_start_dates[i]))
keys[i] = block_start_dates[i]; // NOLINT
else
keys[i] = block_end_dates[i]; // NOLINT
}
};
return keys;
}
template <DictionaryKeyType dictionary_key_type>
Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
RangeHashedDictionaryCallGetSourceImpl<dictionary_key_type> callable;
callable.dict = this;
callable.column_names = &column_names;
callable.max_block_size = max_block_size;
auto type = dict_struct.range_min->type;
TLUtils::forEach(TLIntegral{}, callable);
ColumnsWithTypeAndName key_columns;
ColumnWithTypeAndName range_min_column;
ColumnWithTypeAndName range_max_column;
if (callable.pipe.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Unexpected range type for RangeHashed dictionary: {}",
dict_struct.range_min->type->getName());
auto type_call = [&](const auto & types) mutable -> bool
{
using Types = std::decay_t<decltype(types)>;
using LeftDataType = typename Types::LeftType;
return std::move(callable.pipe);
if constexpr (IsDataTypeNumber<LeftDataType> ||
std::is_same_v<LeftDataType, DataTypeDate> ||
std::is_same_v<LeftDataType, DataTypeDate32> ||
std::is_same_v<LeftDataType, DataTypeDateTime>)
{
using RangeType = typename LeftDataType::FieldType;
PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
getKeysAndDates(keys, start_dates, end_dates);
range_min_column = ColumnWithTypeAndName{getColumnFromPODArray(start_dates), dict_struct.range_min->type, dict_struct.range_min->name};
range_max_column = ColumnWithTypeAndName{getColumnFromPODArray(end_dates), dict_struct.range_max->type, dict_struct.range_max->name};
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
else
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
auto date_column = getColumnFromPODArray(makeDateKeys(start_dates, end_dates));
key_columns.emplace_back(ColumnWithTypeAndName{std::move(date_column), std::make_shared<DataTypeInt64>(), ""});
return true;
}
else
{
return false;
}
};
if (!callOnIndexAndDataType<void>(type->getTypeId(), type_call))
throw Exception(ErrorCodes::LOGICAL_ERROR, "RangeHashedDictionary min max range type should be numeric");
ColumnsWithTypeAndName data_columns = {std::move(range_min_column), std::move(range_max_column)};
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), std::move(data_columns), max_block_size);
Pipes pipes;
for (size_t i = 0; i < num_streams; ++i)
{
auto source = std::make_shared<DictionarySource>(coordinator);
pipes.emplace_back(Pipe(std::move(source)));
}
return Pipe::unitePipes(std::move(pipes));
}

View File

@ -90,7 +90,7 @@ public:
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
Pipe read(const Names & column_names, size_t max_block_size) const override;
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
private:
template <typename T>
@ -175,13 +175,12 @@ private:
PaddedPODArray<RangeType> & end_dates) const;
template <typename RangeType>
Pipe readImpl(const Names & column_names, size_t max_block_size) const;
PaddedPODArray<Int64> makeDateKeys(
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
StringRef copyKeyInArena(StringRef key);
template <DictionaryKeyType>
friend struct RangeHashedDictionaryCallGetSourceImpl;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;

View File

@ -57,7 +57,7 @@ protected:
String getFileContents(const String & file_name)
{
auto buf = encrypted_disk->readFile(file_name, {}, 0);
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* size= */ {});
String str;
readStringUntilEOF(str, *buf);
return str;
@ -65,7 +65,7 @@ protected:
static String getBinaryRepresentation(const String & abs_path)
{
auto buf = createReadBufferFromFileBase(abs_path, {}, 0);
auto buf = createReadBufferFromFileBase(abs_path, /* settings= */ {});
String str;
readStringUntilEOF(str, *buf);
return str;

View File

@ -1,9 +1,49 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h>
namespace
{
bool isNullableStringOrNullableNothing(DB::DataTypePtr type)
{
if (type->isNullable())
{
const auto & nested_type = assert_cast<const DB::DataTypeNullable &>(*type).getNestedType();
if (isString(nested_type) || isNothing(nested_type))
return true;
}
return false;
}
}
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & arguments) const
{
if (arguments.size() != 1 && arguments.size() != 2)
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be 1 or 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
// An array consisting of only Null-s has type Array(Nullable(Nothing))
if (!array_type || !(isString(array_type->getNestedType()) || isNullableStringOrNullableNothing(array_type->getNestedType())))
throw Exception(
"First argument for function " + getName() + " must be an array of String-s or Nullable(String)-s.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2 && !isString(arguments[1]))
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
void registerFunctionsStringArray(FunctionFactory & factory)
{

View File

@ -1,18 +1,21 @@
#pragma once
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnArray.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
@ -21,7 +24,6 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
@ -650,13 +652,15 @@ public:
class FunctionArrayStringConcat : public IFunction
{
private:
void executeInternal(
static void executeInternal(
const ColumnString::Chars & src_chars,
const ColumnString::Offsets & src_string_offsets,
const ColumnArray::Offsets & src_array_offsets,
const char * delimiter, const size_t delimiter_size,
const char * delimiter,
const size_t delimiter_size,
ColumnString::Chars & dst_chars,
ColumnString::Offsets & dst_string_offsets) const
ColumnString::Offsets & dst_string_offsets,
const char8_t * null_map)
{
size_t size = src_array_offsets.size();
@ -674,29 +678,33 @@ private:
dst_string_offsets.resize(src_array_offsets.size());
ColumnArray::Offset current_src_array_offset = 0;
ColumnString::Offset current_src_string_offset = 0;
ColumnString::Offset current_dst_string_offset = 0;
/// Loop through the array of strings.
for (size_t i = 0; i < size; ++i)
{
bool first_non_null = true;
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
{
if (unlikely(null_map && null_map[current_src_array_offset]))
continue;
if (!first_non_null)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
first_non_null = false;
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
memcpySmallAllowReadWriteOverflow15(
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
current_src_string_offset = src_string_offsets[current_src_array_offset];
current_dst_string_offset += bytes_to_copy;
if (current_src_array_offset + 1 != next_src_array_offset)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
}
dst_chars[current_dst_string_offset] = 0;
@ -708,6 +716,24 @@ private:
dst_chars.resize(dst_string_offsets.back());
}
static void executeInternal(
const ColumnString & col_string,
const ColumnArray & col_arr,
const String & delimiter,
ColumnString & col_res,
const char8_t * null_map = nullptr)
{
executeInternal(
col_string.getChars(),
col_string.getOffsets(),
col_arr.getOffsets(),
delimiter.data(),
delimiter.size(),
col_res.getChars(),
col_res.getOffsets(),
null_map);
}
public:
static constexpr auto name = "arrayStringConcat";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
@ -721,23 +747,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1 && arguments.size() != 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1 or 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
if (!array_type || !isString(array_type->getNestedType()))
throw Exception("First argument for function " + getName() + " must be array of strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2
&& !isString(arguments[1]))
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
@ -755,10 +765,14 @@ public:
{
Array src_arr = col_const_arr->getValue<Array>();
String dst_str;
bool first_non_null = true;
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
{
if (i != 0)
if (src_arr[i].isNull())
continue;
if (!first_non_null)
dst_str += delimiter;
first_non_null = false;
dst_str += src_arr[i].get<const String &>();
}
@ -767,15 +781,20 @@ public:
else
{
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*arguments[0].column);
const ColumnString & col_string = assert_cast<const ColumnString &>(col_arr.getData());
auto col_res = ColumnString::create();
executeInternal(
col_string.getChars(), col_string.getOffsets(), col_arr.getOffsets(),
delimiter.data(), delimiter.size(),
col_res->getChars(), col_res->getOffsets());
if (WhichDataType(col_arr.getData().getDataType()).isString())
{
const ColumnString & col_string = assert_cast<const ColumnString &>(col_arr.getData());
executeInternal(col_string, col_arr, delimiter, *col_res);
}
else
{
const ColumnNullable & col_nullable = assert_cast<const ColumnNullable &>(col_arr.getData());
if (const ColumnString * col_string = typeid_cast<const ColumnString *>(col_nullable.getNestedColumnPtr().get()))
executeInternal(*col_string, col_arr, delimiter, *col_res, col_nullable.getNullMapData().data());
else
col_res->insertManyDefaults(col_arr.size());
}
return col_res;
}
}

View File

@ -200,7 +200,7 @@ struct MatchImpl
}
/// We check that the entry does not pass through the boundaries of strings.
if (pos + strstr_pattern.size() < begin + offsets[i])
if (pos + required_substring.size() < begin + offsets[i])
{
/// And if it does not, if necessary, we check the regexp.
@ -344,7 +344,7 @@ struct MatchImpl
const UInt8 * next_pos = begin;
/// If required substring is larger than string size - it cannot be found.
if (strstr_pattern.size() <= n)
if (required_substring.size() <= n)
{
Searcher searcher(required_substring.data(), required_substring.size(), end - pos);
@ -360,7 +360,7 @@ struct MatchImpl
}
next_pos += n;
if (pos + strstr_pattern.size() <= next_pos)
if (pos + required_substring.size() <= next_pos)
{
/// And if it does not, if necessary, we check the regexp.

View File

@ -43,13 +43,13 @@ private:
class FunctionBaseNow : public IFunctionBase
{
public:
explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
explicit FunctionBaseNow(time_t time_, DataTypes argument_types_, DataTypePtr return_type_)
: time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
String getName() const override { return "now"; }
const DataTypes & getArgumentTypes() const override
{
static const DataTypes argument_types;
return argument_types;
}
@ -69,6 +69,7 @@ public:
private:
time_t time_value;
DataTypes argument_types;
DataTypePtr return_type;
};
@ -117,8 +118,10 @@ public:
}
if (arguments.size() == 1)
return std::make_unique<FunctionBaseNow>(
time(nullptr), std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), std::make_shared<DataTypeDateTime>());
time(nullptr), DataTypes{arguments.front().type},
std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), DataTypes(), std::make_shared<DataTypeDateTime>());
}
};

View File

@ -67,13 +67,13 @@ private:
class FunctionBaseNow64 : public IFunctionBase
{
public:
explicit FunctionBaseNow64(Field time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
explicit FunctionBaseNow64(Field time_, DataTypes argument_types_, DataTypePtr return_type_)
: time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
String getName() const override { return "now64"; }
const DataTypes & getArgumentTypes() const override
{
static const DataTypes argument_types;
return argument_types;
}
@ -93,6 +93,7 @@ public:
private:
Field time_value;
DataTypes argument_types;
DataTypePtr return_type;
};
@ -139,14 +140,19 @@ public:
return std::make_shared<DataTypeDateTime64>(scale, timezone_name);
}
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type) const override
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
UInt32 scale = DataTypeDateTime64::default_scale;
auto res_type = removeNullable(result_type);
if (const auto * type = typeid_cast<const DataTypeDateTime64 *>(res_type.get()))
scale = type->getScale();
return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), result_type);
DataTypes arg_types;
arg_types.reserve(arguments.size());
for (const auto & arg : arguments)
arg_types.push_back(arg.type);
return std::make_unique<FunctionBaseNow64>(nowSubsecond(scale), std::move(arg_types), std::move(result_type));
}
};

View File

@ -407,14 +407,20 @@ try
}
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
std::unique_ptr<ReadBuffer> buffer;
std::unique_ptr<ReadBuffer> last_buffer;
for (const auto & entry : data->entries)
{
buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
current_entry = entry;
total_rows += executor.execute(*buffer);
/// Keep buffer, because it still can be used
/// in destructor, while resetting buffer at next iteration.
last_buffer = std::move(buffer);
}
format->addBuffer(std::move(last_buffer));
auto chunk = Chunk(executor.getResultColumns(), total_rows);
size_t total_bytes = chunk.bytes();

View File

@ -84,6 +84,14 @@ public:
UInt64 client_version_patch = 0;
unsigned client_tcp_protocol_version = 0;
/// In case of distributed query, client info for query is actually a client info of client.
/// In order to get a version of server-initiator, use connection_ values.
/// Also for tcp only.
UInt64 connection_client_version_major = 0;
UInt64 connection_client_version_minor = 0;
UInt64 connection_client_version_patch = 0;
unsigned connection_tcp_protocol_version = 0;
/// For http
HTTPMethod http_method = HTTPMethod::UNKNOWN;
String http_user_agent;

View File

@ -161,8 +161,8 @@ struct ContextSharedPart
ConfigurationPtr zookeeper_config; /// Stores zookeeper configs
#if USE_NURAFT
mutable std::mutex keeper_storage_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_storage_dispatcher;
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher;
#endif
mutable std::mutex auxiliary_zookeepers_mutex;
mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients.
@ -1901,10 +1901,9 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded()
void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);
std::lock_guard lock(shared->keeper_dispatcher_mutex);
if (shared->keeper_storage_dispatcher)
if (shared->keeper_dispatcher)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times");
const auto & config = getConfigRef();
@ -1914,17 +1913,17 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons
if (start_async)
{
assert(!is_standalone_app);
LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster"
", will wait for Keeper asynchronously");
LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster, "
"will wait for Keeper asynchronously");
}
else
{
LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start,"
LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start, "
"will wait for Keeper synchronously");
}
shared->keeper_storage_dispatcher = std::make_shared<KeeperDispatcher>();
shared->keeper_storage_dispatcher->initialize(config, is_standalone_app, start_async);
shared->keeper_dispatcher = std::make_shared<KeeperDispatcher>();
shared->keeper_dispatcher->initialize(config, is_standalone_app, start_async);
}
#endif
}
@ -1932,27 +1931,39 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons
#if USE_NURAFT
std::shared_ptr<KeeperDispatcher> & Context::getKeeperDispatcher() const
{
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);
if (!shared->keeper_storage_dispatcher)
std::lock_guard lock(shared->keeper_dispatcher_mutex);
if (!shared->keeper_dispatcher)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests");
return shared->keeper_storage_dispatcher;
return shared->keeper_dispatcher;
}
#endif
void Context::shutdownKeeperDispatcher() const
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_storage_dispatcher_mutex);
if (shared->keeper_storage_dispatcher)
std::lock_guard lock(shared->keeper_dispatcher_mutex);
if (shared->keeper_dispatcher)
{
shared->keeper_storage_dispatcher->shutdown();
shared->keeper_storage_dispatcher.reset();
shared->keeper_dispatcher->shutdown();
shared->keeper_dispatcher.reset();
}
#endif
}
void Context::updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config)
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_dispatcher_mutex);
if (!shared->keeper_dispatcher)
return;
shared->keeper_dispatcher->updateConfiguration(config);
#endif
}
zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
{
std::lock_guard lock(shared->auxiliary_zookeepers_mutex);

Some files were not shown because too many files have changed in this diff Show More