Merge branch 'master' into feat/server_settings

This commit is contained in:
skyoct 2023-12-20 21:30:36 +08:00 committed by GitHub
commit 296a0bf981
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
223 changed files with 4555 additions and 2326 deletions

3
.gitmodules vendored
View File

@ -360,3 +360,6 @@
[submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git
[submodule "contrib/idna"]
path = contrib/idna
url = https://github.com/ada-url/idna.git

View File

@ -154,6 +154,7 @@ add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (idna-cmake idna)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp)

1
contrib/idna vendored Submodule

@ -0,0 +1 @@
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667

View File

@ -0,0 +1,24 @@
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_IDNA))
message (STATUS "Not using idna")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
set (SRCS
"${LIBRARY_DIR}/src/idna.cpp"
"${LIBRARY_DIR}/src/mapping.cpp"
"${LIBRARY_DIR}/src/mapping_tables.cpp"
"${LIBRARY_DIR}/src/normalization.cpp"
"${LIBRARY_DIR}/src/normalization_tables.cpp"
"${LIBRARY_DIR}/src/punycode.cpp"
"${LIBRARY_DIR}/src/to_ascii.cpp"
"${LIBRARY_DIR}/src/to_unicode.cpp"
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
"${LIBRARY_DIR}/src/validity.cpp"
)
add_library (_idna ${SRCS})
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
add_library (ch_contrib::idna ALIAS _idna)

View File

@ -11,7 +11,9 @@ option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during quer
option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
option (ENABLE_BLAKE3 "Enable BLAKE3 function" ${ENABLE_LIBRARIES})
if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER AND NOT ENABLE_BLAKE3)
message(STATUS "Not using LLVM")
return()
endif()
@ -26,61 +28,75 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# and llvm cannot be compiled with bundled libcxx and 20 standard.
set (CMAKE_CXX_STANDARD 14)
# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
set (REQUIRED_LLVM_LIBRARIES
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMAsmPrinter
LLVMDebugInfoDWARF
LLVMGlobalISel
LLVMSelectionDAG
LLVMMCDisassembler
LLVMPasses
LLVMCodeGen
LLVMipo
LLVMBitWriter
LLVMInstrumentation
LLVMScalarOpts
LLVMAggressiveInstCombine
LLVMInstCombine
LLVMVectorize
LLVMTransformUtils
LLVMTarget
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMBitReader
LLVMCore
LLVMRemarks
LLVMBitstreamReader
LLVMMCParser
LLVMMC
LLVMBinaryFormat
LLVMDebugInfoCodeView
LLVMSupport
LLVMDemangle
)
if (ARCH_AMD64)
set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
elseif (ARCH_AARCH64)
set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
elseif (ARCH_PPC64LE)
set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "")
elseif (ARCH_S390X)
set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "")
elseif (ARCH_RISCV64)
set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "")
endif ()
if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
# Only compiling blake3
set (REQUIRED_LLVM_LIBRARIES LLVMSupport)
else()
# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
set (REQUIRED_LLVM_LIBRARIES
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMAsmPrinter
LLVMDebugInfoDWARF
LLVMGlobalISel
LLVMSelectionDAG
LLVMMCDisassembler
LLVMPasses
LLVMCodeGen
LLVMipo
LLVMBitWriter
LLVMInstrumentation
LLVMScalarOpts
LLVMAggressiveInstCombine
LLVMInstCombine
LLVMVectorize
LLVMTransformUtils
LLVMTarget
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMBitReader
LLVMCore
LLVMRemarks
LLVMBitstreamReader
LLVMMCParser
LLVMMC
LLVMBinaryFormat
LLVMDebugInfoCodeView
LLVMSupport
LLVMDemangle
)
if (ARCH_AMD64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
elseif (ARCH_AARCH64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
elseif (ARCH_PPC64LE)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen)
elseif (ARCH_S390X)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen)
elseif (ARCH_RISCV64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen)
endif ()
endif()
# Skip useless "install" instructions from CMake:
set (LLVM_INSTALL_TOOLCHAIN_ONLY 1 CACHE INTERNAL "")
if (ARCH_AMD64)
set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
elseif (ARCH_AARCH64)
set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
elseif (ARCH_PPC64LE)
set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen)
elseif (ARCH_S390X)
set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen)
elseif (ARCH_RISCV64)
set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen)
endif ()
message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}")
set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind

View File

@ -216,11 +216,11 @@ export -f run_tests
if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
fi
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
echo "Files in current directory"
ls -la ./

View File

@ -35,4 +35,17 @@ function fn_exists() {
declare -F "$1" > /dev/null;
}
function timeout_with_logging() {
local exit_code=0
timeout "${@}" || exit_code="${?}"
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
fi
return $exit_code
}
# vi: ft=bash

View File

@ -67,7 +67,6 @@ Engines in the family:
Engines in the family:
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
- [Merge](../../engines/table-engines/special/merge.md#merge)
- [File](../../engines/table-engines/special/file.md#file)

View File

@ -12,7 +12,7 @@ In most cases you do not need a partition key, and in most other cases you do no
You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression.
:::
Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
Partitioning is available for the [MergeTree family tables](../../../engines/table-engines/mergetree-family/mergetree.md), including [replicated tables](../../../engines/table-engines/mergetree-family/replication.md) and [materialized views](../../../sql-reference/statements/create/view.md#materialized-view).
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. Partitions improve performance for queries containing a partitioning key because ClickHouse will filter for that partition before selecting the parts and granules within the partition.

View File

@ -1,9 +0,0 @@
---
slug: /en/engines/table-engines/special/materializedview
sidebar_position: 100
sidebar_label: MaterializedView
---
# MaterializedView Table Engine
Used for implementing materialized views (for more information, see [CREATE VIEW](../../../sql-reference/statements/create/view.md#materialized)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.

View File

@ -29,6 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.
:::security consideration
The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
:::
## Configuration Settings and Usage
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the

View File

@ -460,6 +460,12 @@ Possible values:
Default value: 1048576.
## http_make_head_request {#http-make-head-request}
The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests.
Default value: `true`.
## table_function_remote_max_addresses {#table_function_remote_max_addresses}
Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function.
@ -4158,6 +4164,41 @@ Result:
└─────┴─────┴───────┘
```
## enable_order_by_all {#enable-order-by-all}
Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md)
Possible values:
- 0 — Disable ORDER BY ALL.
- 1 — Enable ORDER BY ALL.
Default value: `1`.
**Example**
Query:
```sql
CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all;
```
Result:
```text
┌─C1─┬─C2─┬─ALL─┐
│ 20 │ 20 │ 10 │
│ 30 │ 10 │ 20 │
│ 10 │ 20 │ 30 │
└────┴────┴─────┘
```
## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
@ -5093,3 +5134,25 @@ When set to `true` than for all s3 requests first two attempts are made with low
When set to `false` than all attempts are made with identical timeouts.
Default value: `true`.
## max_partition_size_to_drop
Restriction on dropping partitions in query time.
Default value: 50 GB.
The value 0 means that you can drop partitions without any restrictions.
:::note
This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
:::
## max_table_size_to_drop
Restriction on deleting tables in query time.
Default value: 50 GB.
The value 0 means that you can delete all tables without any restrictions.
:::note
This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)
:::

View File

@ -29,7 +29,7 @@ Columns:
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([materialized views](../../sql-reference/statements/create/view.md#materialized-view) the current table).
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.
@ -57,6 +57,8 @@ Columns:
- If the table stores data on disk, returns used space on disk (i.e. compressed).
- If the table stores data in memory, returns approximated number of used bytes in memory.
- `total_bytes_uncompressed` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of uncompressed bytes, if it's possible to quickly determine the exact number of bytes from the part checksums for the table on storage, otherwise `NULL` (does not take underlying storage (if any) into account).
- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables).
- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables).

View File

@ -394,7 +394,7 @@ Configuration example:
or
``` sql
LAYOUT(HASHED_ARRAY())
LAYOUT(HASHED_ARRAY([SHARDS 1]))
```
### complex_key_hashed_array
@ -412,7 +412,7 @@ Configuration example:
or
``` sql
LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1]))
```
### range_hashed {#range_hashed}
@ -2415,8 +2415,8 @@ clickhouse client \
--secure \
--password MY_PASSWORD \
--query "
INSERT INTO regexp_dictionary_source_table
SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)')
INSERT INTO regexp_dictionary_source_table
SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)')
FORMAT CSV" < regexp_dict.csv
```

View File

@ -2831,3 +2831,92 @@ Result:
│ SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)
└─────────────────────────────────────────────────────────────────────────┘
```
## minSampleSizeConversion
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.
**Syntax**
``` sql
minSampleSizeConversion(baseline, mde, power, alpha)
```
Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the sample size required for one group (i.e. the sample size required for the whole experiment is twice the returned value).
**Arguments**
- `baseline` — Baseline conversion. [Float](../data-types/float.md).
- `mde` — Minimum detectable effect (MDE) as percentage points (e.g. for a baseline conversion 0.25 the MDE 0.03 means an expected change to 0.25 ± 0.03). [Float](../data-types/float.md).
- `power` — Required statistical power of a test (1 - probability of Type II error). [Float](../data-types/float.md).
- `alpha` — Required significance level of a test (probability of Type I error). [Float](../data-types/float.md).
**Returned value**
A named [Tuple](../data-types/tuple.md) with 3 elements:
- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md).
- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline - mde`. [Float64](../data-types/float.md).
- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline + mde`. [Float64](../data-types/float.md).
**Example**
The following query calculates the required sample size for an A/B test with baseline conversion of 25%, MDE of 3%, significance level of 5%, and the desired statistical power of 80%:
``` sql
SELECT minSampleSizeConversion(0.25, 0.03, 0.80, 0.05) AS sample_size;
```
Result:
``` text
┌─sample_size───────────────────┐
│ (3396.077603219163,0.22,0.28) │
└───────────────────────────────┘
```
## minSampleSizeContinuous
Calculates minimum required sample size for an A/B test comparing means of a continuous metric in two samples.
**Syntax**
``` sql
minSampleSizeContinous(baseline, sigma, mde, power, alpha)
```
Alias: `minSampleSizeContinous`
Uses the formula described in [this article](https://towardsdatascience.com/required-sample-size-for-a-b-testing-6f6608dd330a). Assumes equal sizes of treatment and control groups. Returns the required sample size for one group (i.e. the sample size required for the whole experiment is twice the returned value). Also assumes equal variance of the test metric in treatment and control groups.
**Arguments**
- `baseline` — Baseline value of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
- `sigma` — Baseline standard deviation of a metric. [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
- `mde` — Minimum detectable effect (MDE) as percentage of the baseline value (e.g. for a baseline value 112.25 the MDE 0.03 means an expected change to 112.25 ± 112.25*0.03). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
- `power` — Required statistical power of a test (1 - probability of Type II error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
- `alpha` — Required significance level of a test (probability of Type I error). [Integer](../data-types/int-uint.md) or [Float](../data-types/float.md).
**Returned value**
A named [Tuple](../data-types/tuple.md) with 3 elements:
- `"minimum_sample_size"` — Required sample size. [Float64](../data-types/float.md).
- `"detect_range_lower"` — Lower bound of the range of values not detectable with the returned required sample size (i.e. all values less than or equal to `"detect_range_lower"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 - mde)`. [Float64](../data-types/float.md).
- `"detect_range_upper"` — Upper bound of the range of values not detectable with the returned required sample size (i.e. all values greater than or equal to `"detect_range_upper"` are detectable with the provided `alpha` and `power`). Calculated as `baseline * (1 + mde)`. [Float64](../data-types/float.md).
**Example**
The following query calculates the required sample size for an A/B test on a metric with baseline value of 112.25, standard deviation of 21.1, MDE of 3%, significance level of 5%, and the desired statistical power of 80%:
``` sql
SELECT minSampleSizeContinous(112.25, 21.1, 0.03, 0.80, 0.05) AS sample_size;
```
Result:
``` text
┌─sample_size───────────────────────────┐
│ (616.2931945826209,108.8825,115.6175) │
└───────────────────────────────────────┘
```

View File

@ -1383,6 +1383,71 @@ Result:
└──────────────────┘
```
## punycodeEncode
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
The string must be UTF8-encoded, otherwise results are undefined.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Punycode representation of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeEncode('München');
```
Result:
```result
┌─punycodeEncode('München')─┐
│ Mnchen-3ya │
└───────────────────────────┘
```
## punycodeDecode
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Punycode-encoded string. [String](../data-types/string.md)
**Returned value**
- The plaintext of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeDecode('Mnchen-3ya');
```
Result:
```result
┌─punycodeEncode('Mnchen-3ya')─┐
│ München │
└──────────────────────────────┘
```
## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.

View File

@ -17,7 +17,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. No
OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
```
The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines arent supported.
The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family (including [materialized views](../../sql-reference/statements/create/view.md#materialized-view)) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines arent supported.
When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`).

View File

@ -5,12 +5,22 @@ sidebar_label: ORDER BY
# ORDER BY Clause
The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so its usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. Sorting is case-sensitive.
The `ORDER BY` clause contains
If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
- a list of expressions, e.g. `ORDER BY visits, search_phrase`,
- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time).
If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well.
To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0.
The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
Unless an explicit sort order is specified, `ASC` is used by default.
The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`.
Also, sorting is performed case-sensitively.
Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order.
If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic.
## Sorting of Special Values
@ -265,8 +275,9 @@ Consider disabling `optimize_read_in_order` manually, when running queries that
Optimization is supported in the following table engines:
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)
- [Merge](../../../engines/table-engines/special/merge.md), [Buffer](../../../engines/table-engines/special/buffer.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) table engines over `MergeTree`-engine tables
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (including [materialized views](../../../sql-reference/statements/create/view.md#materialized-view)),
- [Merge](../../../engines/table-engines/special/merge.md),
- [Buffer](../../../engines/table-engines/special/buffer.md)
In `MaterializedView`-engine tables the optimization works with views like `SELECT ... FROM merge_tree_table ORDER BY pk`. But it is not supported in the queries like `SELECT ... FROM view ORDER BY pk` if the view query does not have the `ORDER BY` clause.

View File

@ -61,6 +61,22 @@ sidebar_label: ORDER BY
我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。
## ORDER BY ALL
`ORDER BY ALL` 对所有选定的列进行升序排序。
示例:
``` sql
SELECT a, b, c FROM t ORDER BY ALL
```
等同于:
``` sql
SELECT a, b, c FROM t ORDER BY a, b, c
```
## 实现细节 {#implementation-details}
更少的RAM使用如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。

View File

@ -2128,10 +2128,9 @@ void Server::createServers(
{
const Settings & settings = global_context->getSettingsRef();
Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings.http_receive_timeout);
http_params->setKeepAliveTimeout(keep_alive_timeout);
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
Poco::Util::AbstractConfiguration::Keys protocols;
config.keys("protocols", protocols);
@ -2385,10 +2384,9 @@ void Server::createInterserverServers(
{
const Settings & settings = global_context->getSettingsRef();
Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings.http_receive_timeout);
http_params->setKeepAliveTimeout(keep_alive_timeout);
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)

View File

@ -1,3 +0,0 @@
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
target_include_directories(_ch_rust_blake3 INTERFACE include)
add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)

View File

@ -1,20 +0,0 @@
[package]
name = "_ch_rust_blake3"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
blake3 = "1.2.0"
libc = "0.2.132"
[lib]
crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit
lto = true

View File

@ -1,15 +0,0 @@
#ifndef BLAKE3_H
#define BLAKE3_H
#include <cstdint>
extern "C" {
char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data);
void blake3_free_char_pointer(char *ptr_to_free);
} // extern "C"
#endif /* BLAKE3_H */

View File

@ -1,31 +0,0 @@
extern crate blake3;
extern crate libc;
use std::ffi::{CString};
use std::slice;
use std::os::raw::c_char;
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let input_res = slice::from_raw_parts(begin as *const u8, size as usize);
let mut hasher = blake3::Hasher::new();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
std::ptr::null_mut()
}
// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw
#[no_mangle]
pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) {
std::mem::drop(CString::from_raw(ptr_to_free));
}

View File

@ -99,6 +99,5 @@ function(add_rust_subdirectory src)
VERBATIM)
endfunction()
add_rust_subdirectory (BLAKE3)
add_rust_subdirectory (skim)
add_rust_subdirectory (prql)

683
rust/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
# workspace is required to vendor crates for all packages.
[workspace]
members = [
"BLAKE3",
"skim",
"prql",
]

View File

@ -119,6 +119,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
extern const int UNEXPECTED_EXPRESSION;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -1209,6 +1210,8 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@ -2312,6 +2315,35 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
{
auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
if (!all_node)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
auto & projection_nodes = query_tree_node_typed.getProjection().getNodes();
auto list_node = std::make_shared<ListNode>();
list_node->getNodes().reserve(projection_nodes.size());
for (auto & node : projection_nodes)
{
if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
if (Poco::toUpper(function_node->getAlias()) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
list_node->getNodes().push_back(sort_node);
}
query_tree_node_typed.getOrderByNode() = list_node;
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
{
@ -6975,6 +7007,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
expandOrderByAll(query_node_typed);
/// Initialize aliases in query node scope
QueryExpressionsAliasVisitor visitor(scope);

View File

@ -219,6 +219,18 @@ public:
is_group_by_all = is_group_by_all_value;
}
/// Returns true, if query node has ORDER BY ALL modifier, false otherwise
bool isOrderByAll() const
{
return is_order_by_all;
}
/// Set query node ORDER BY ALL modifier value
void setIsOrderByAll(bool is_order_by_all_value)
{
is_order_by_all = is_order_by_all_value;
}
/// Returns true if query node WITH section is not empty, false otherwise
bool hasWith() const
{
@ -590,6 +602,7 @@ private:
bool is_group_by_with_cube = false;
bool is_group_by_with_grouping_sets = false;
bool is_group_by_all = false;
bool is_order_by_all = false;
std::string cte_name;
NamesAndTypes projection_columns;

View File

@ -284,6 +284,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
current_query_tree->setOriginalAST(select_query);
auto current_context = current_query_tree->getContext();

View File

@ -12,7 +12,7 @@ LibraryBridgeHelper::LibraryBridgeHelper(ContextPtr context_)
, http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value)
, bridge_host(config.getString("library_bridge.host", DEFAULT_HOST))
, bridge_port(config.getUInt("library_bridge.port", DEFAULT_PORT))
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout))
{
}

View File

@ -162,7 +162,7 @@ private:
ConnectionTimeouts getHTTPTimeouts()
{
return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), {getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0});
return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings().keep_alive_timeout);
}
protected:

View File

@ -179,13 +179,22 @@ private:
{
ptr = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == ptr)
throw ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size));
throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot mmap {}", ReadableSize(size));
}
~Chunk()
{
if (ptr && 0 != munmap(ptr, size))
throw ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size));
{
try
{
throw DB::ErrnoException(DB::ErrorCodes::CANNOT_MUNMAP, "Allocator: Cannot munmap {}", ReadableSize(size));
}
catch (DB::ErrnoException &)
{
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
Chunk(Chunk && other) noexcept : ptr(other.ptr), size(other.size)

View File

@ -28,6 +28,7 @@
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_SQIDS
#cmakedefine01 USE_IDNA
#cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_LIBURING

View File

@ -115,6 +115,10 @@ enum class RuntimeReloadType
MW(M, Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0, RuntimeReloadType::NO) \
MW(M, Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0, RuntimeReloadType::NO) \
\
MW(M, Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0, RuntimeReloadType::NO) \
MW(M, Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
MW(M, Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
MW(M, Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0, RuntimeReloadType::NO) \
MW(M, UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0, RuntimeReloadType::NO) \
MW(M, Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0, RuntimeReloadType::NO) \
MW(M, UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0, RuntimeReloadType::NO) \

View File

@ -338,6 +338,7 @@ class IColumn;
M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \
M(UInt64, http_max_chunk_size, 100_GiB, "Maximum value of a chunk size in HTTP chunked transfer encoding", 0) \
M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \
M(Bool, http_make_head_request, true, "Allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size", 0) \
M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(UInt64, use_index_for_in_with_subqueries_max_values, 0, "The maximum size of set in the right hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to preparation of additional data structures for large queries. Zero means no limit.", 0) \
@ -527,8 +528,8 @@ class IColumn;
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \
M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \
M(UInt64, max_table_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \
M(UInt64, max_partition_size_to_drop, 0, "Only available in ClickHouse Cloud", 0) \
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
\
M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
@ -845,6 +846,7 @@ class IColumn;
M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0)\
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.

View File

@ -323,7 +323,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false);
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
use_table_cache = named_collection->getOrDefault<UInt64>("use_table_cache", 0);
}
else
@ -386,7 +386,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, false);
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
}
else
{

View File

@ -38,7 +38,7 @@ HTTPDictionarySource::HTTPDictionarySource(
, configuration(configuration_)
, sample_block(sample_block_)
, context(context_)
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
{
credentials.setUsername(credentials_.getUsername());
credentials.setPassword(credentials_.getPassword());
@ -51,7 +51,7 @@ HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other)
, configuration(other.configuration)
, sample_block(other.sample_block)
, context(Context::createCopy(other.context))
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
{
credentials.setUsername(other.credentials.getUsername());
credentials.setPassword(other.credentials.getPassword());

View File

@ -20,17 +20,19 @@ namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int DICTIONARY_IS_EMPTY;
extern const int LOGICAL_ERROR;
extern const int UNSUPPORTED_METHOD;
}
template <DictionaryKeyType dictionary_key_type>
HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
template <DictionaryKeyType dictionary_key_type, bool sharded>
HashedArrayDictionary<dictionary_key_type, sharded>::HashedArrayDictionary(
const StorageID & dict_id_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const HashedArrayDictionaryStorageConfiguration & configuration_,
BlockPtr update_field_loaded_block_)
: IDictionary(dict_id_)
, log(&Poco::Logger::get("HashedArrayDictionary"))
, dict_struct(dict_struct_)
, source_ptr(std::move(source_ptr_))
, configuration(configuration_)
@ -42,8 +44,8 @@ HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
calculateBytesAllocated();
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
template <DictionaryKeyType dictionary_key_type, bool sharded>
ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
@ -67,8 +69,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
return getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
}
template <DictionaryKeyType dictionary_key_type>
Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
template <DictionaryKeyType dictionary_key_type, bool sharded>
Columns HashedArrayDictionary<dictionary_key_type, sharded>::getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
@ -83,7 +85,7 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
const size_t keys_size = extractor.getKeysSize();
PaddedPODArray<ssize_t> key_index_to_element_index;
KeyIndexToElementIndex key_index_to_element_index;
/** Optimization for multiple attributes.
* For each key save element index in key_index_to_element_index array.
@ -92,7 +94,6 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
*/
if (attribute_names.size() > 1)
{
const auto & key_attribute_container = key_attribute.container;
size_t keys_found = 0;
key_index_to_element_index.resize(keys_size);
@ -100,15 +101,23 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & key_attribute_container = key_attribute.containers[shard];
auto it = key_attribute_container.find(key);
if (it == key_attribute_container.end())
{
key_index_to_element_index[key_index] = -1;
if constexpr (sharded)
key_index_to_element_index[key_index] = std::make_pair(-1, shard);
else
key_index_to_element_index[key_index] = -1;
}
else
{
key_index_to_element_index[key_index] = it->getMapped();
if constexpr (sharded)
key_index_to_element_index[key_index] = std::make_pair(it->getMapped(), shard);
else
key_index_to_element_index[key_index] = it->getMapped();
++keys_found;
}
@ -147,8 +156,8 @@ Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
return result_columns;
}
template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
template <DictionaryKeyType dictionary_key_type, bool sharded>
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
if (dictionary_key_type == DictionaryKeyType::Complex)
dict_struct.validateKeyTypes(key_types);
@ -166,8 +175,10 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Colum
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
{
auto requested_key = extractor.extractCurrentKey();
auto shard = getShard(requested_key);
const auto & key_attribute_container = key_attribute.containers[shard];
out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end();
out[requested_key_index] = key_attribute_container.find(requested_key) != key_attribute_container.end();
keys_found += out[requested_key_index];
extractor.rollbackCurrentKey();
@ -179,8 +190,8 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Colum
return result;
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
template <DictionaryKeyType dictionary_key_type, bool sharded>
ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
@ -197,16 +208,20 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
auto is_key_valid_func = [&, this](auto & key)
{
const auto & key_attribute_container = key_attribute.containers[getShard(key)];
return key_attribute_container.find(key) != key_attribute_container.end();
};
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
auto get_parent_func = [&, this](auto & hierarchy_key)
{
std::optional<UInt64> result;
auto shard = getShard(hierarchy_key);
const auto & key_attribute_container = key_attribute.containers[shard];
auto it = key_attribute_container.find(hierarchy_key);
@ -215,8 +230,9 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index])
return result;
const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
@ -241,8 +257,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
}
}
template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
template <DictionaryKeyType dictionary_key_type, bool sharded>
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type, sharded>::isInHierarchy(
ColumnPtr key_column [[maybe_unused]],
ColumnPtr in_key_column [[maybe_unused]],
const DataTypePtr &) const
@ -265,16 +281,20 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
auto is_key_valid_func = [&](auto & key)
{
const auto & key_attribute_container = key_attribute.containers[getShard(key)];
return key_attribute_container.find(key) != key_attribute_container.end();
};
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
{
std::optional<UInt64> result;
auto shard = getShard(hierarchy_key);
const auto & key_attribute_container = key_attribute.containers[shard];
auto it = key_attribute_container.find(hierarchy_key);
@ -283,9 +303,10 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][key_index])
return result;
const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
return result;
@ -309,8 +330,8 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
}
}
template <DictionaryKeyType dictionary_key_type>
DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key_type>::getHierarchicalIndex() const
template <DictionaryKeyType dictionary_key_type, bool sharded>
DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key_type, sharded>::getHierarchicalIndex() const
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
@ -318,33 +339,35 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key
return hierarchical_index;
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
const auto & key_attribute_container = key_attribute.container;
HashMap<size_t, UInt64> index_to_key;
index_to_key.reserve(key_attribute.container.size());
for (auto & [key, value] : key_attribute_container)
index_to_key[value] = key;
DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child;
parent_to_child.reserve(index_to_key.size());
size_t parent_keys_container_size = parent_keys_container.size();
for (size_t i = 0; i < parent_keys_container_size; ++i)
for (size_t shard = 0; shard < configuration.shards; ++shard)
{
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i])
continue;
HashMap<size_t, UInt64> index_to_key;
index_to_key.reserve(element_counts[shard]);
const auto * it = index_to_key.find(i);
if (it == index_to_key.end())
continue;
for (auto & [key, value] : key_attribute.containers[shard])
index_to_key[value] = key;
auto child_key = it->getMapped();
auto parent_key = parent_keys_container[i];
parent_to_child[parent_key].emplace_back(child_key);
parent_to_child.reserve(parent_to_child.size() + index_to_key.size());
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const auto & parent_keys_container = std::get<AttributeContainerShardsType<UInt64>>(hierarchical_attribute.containers)[shard];
size_t parent_keys_container_size = parent_keys_container.size();
for (size_t i = 0; i < parent_keys_container_size; ++i)
{
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[shard][i])
continue;
const auto * it = index_to_key.find(i);
if (it == index_to_key.end())
continue;
auto child_key = it->getMapped();
auto parent_key = parent_keys_container[i];
parent_to_child[parent_key].emplace_back(child_key);
}
}
return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
@ -355,8 +378,8 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key
}
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
template <DictionaryKeyType dictionary_key_type, bool sharded>
ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getDescendants(
ColumnPtr key_column [[maybe_unused]],
const DataTypePtr &,
size_t level [[maybe_unused]],
@ -381,8 +404,8 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
}
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::createAttributes()
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
@ -395,17 +418,24 @@ void HashedArrayDictionary<dictionary_key_type>::createAttributes()
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<bool>>() : std::optional<std::vector<bool>>{};
Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType<ValueType>(), std::move(is_index_null)};
auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<typename Attribute::RowsMask>>(configuration.shards) : std::nullopt;
Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerShardsType<ValueType>(configuration.shards), std::move(is_index_null)};
attributes.emplace_back(std::move(attribute));
};
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
}
key_attribute.containers.resize(configuration.shards);
element_counts.resize(configuration.shards);
string_arenas.resize(configuration.shards);
for (auto & arena : string_arenas)
arena = std::make_unique<Arena>();
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::updateData()
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::updateData()
{
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
{
@ -445,13 +475,17 @@ void HashedArrayDictionary<dictionary_key_type>::updateData()
if (update_field_loaded_block)
{
resize(update_field_loaded_block->rows());
blockToAttributes(*update_field_loaded_block.get());
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
blockToAttributes(*update_field_loaded_block.get(), arena_holder, /* shard = */ 0);
}
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block & block [[maybe_unused]])
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, size_t shard)
{
if (unlikely(shard >= configuration.shards))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Shard number {} is out of range: 0..{}", shard, configuration.shards - 1);
size_t skip_keys_size_offset = dict_struct.getKeysSize();
Columns key_columns;
@ -461,7 +495,6 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
for (size_t i = 0; i < skip_keys_size_offset; ++i)
key_columns.emplace_back(block.safeGetByPosition(i).column);
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
@ -471,18 +504,18 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
{
auto key = keys_extractor.extractCurrentKey();
auto it = key_attribute.container.find(key);
auto it = key_attribute.containers[shard].find(key);
if (it != key_attribute.container.end())
if (it != key_attribute.containers[shard].end())
{
keys_extractor.rollbackCurrentKey();
continue;
}
if constexpr (std::is_same_v<KeyType, StringRef>)
key = copyStringInArena(string_arena, key);
key = copyStringInArena(*string_arenas[shard], key);
key_attribute.container.insert({key, element_count});
key_attribute.containers[shard].insert({key, element_counts[shard]});
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{
@ -498,16 +531,16 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
using AttributeType = typename Type::AttributeType;
using AttributeValueType = DictionaryValueType<AttributeType>;
auto & attribute_container = std::get<AttributeContainerType<AttributeValueType>>(attribute.container);
auto & attribute_container = std::get<AttributeContainerShardsType<AttributeValueType>>(attribute.containers)[shard];
attribute_container.emplace_back();
if (attribute_is_nullable)
{
attribute.is_index_null->emplace_back();
(*attribute.is_index_null)[shard].emplace_back();
if (column_value_to_insert.isNull())
{
(*attribute.is_index_null).back() = true;
(*attribute.is_index_null)[shard].back() = true;
return;
}
}
@ -515,7 +548,7 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
if constexpr (std::is_same_v<AttributeValueType, StringRef>)
{
String & value_to_insert = column_value_to_insert.get<String>();
StringRef string_in_arena_reference = copyStringInArena(string_arena, value_to_insert);
StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert);
attribute_container.back() = string_in_arena_reference;
}
else
@ -528,23 +561,29 @@ void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block &
callOnDictionaryAttributeType(attribute.type, type_call);
}
++element_count;
++element_counts[shard];
++total_element_count;
keys_extractor.rollbackCurrentKey();
}
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::resize(size_t total_rows)
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::resize(size_t total_rows)
{
if (unlikely(!total_rows))
return;
key_attribute.container.reserve(total_rows);
/// In multi shards configuration it is pointless.
if constexpr (sharded)
return;
for (auto & container : key_attribute.containers)
container.reserve(total_rows);
}
template <DictionaryKeyType dictionary_key_type>
template <DictionaryKeyType dictionary_key_type, bool sharded>
template <typename KeysProvider>
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
ColumnPtr HashedArrayDictionary<dictionary_key_type, sharded>::getAttributeColumn(
const Attribute & attribute,
const DictionaryAttribute & dictionary_attribute,
size_t keys_size,
@ -638,16 +677,14 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
return result;
}
template <DictionaryKeyType dictionary_key_type>
template <DictionaryKeyType dictionary_key_type, bool sharded>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
void HashedArrayDictionary<dictionary_key_type, sharded>::getItemsImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value [[maybe_unused]],
DefaultValueExtractor & default_value_extractor) const
{
const auto & key_attribute_container = key_attribute.container;
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
const size_t keys_size = keys_extractor.getKeysSize();
size_t keys_found = 0;
@ -655,6 +692,9 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & key_attribute_container = key_attribute.containers[shard];
const auto & attribute_container = std::get<AttributeContainerShardsType<AttributeType>>(attribute.containers)[shard];
const auto it = key_attribute_container.find(key);
@ -665,7 +705,7 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
const auto & element = attribute_container[element_index];
if constexpr (is_nullable)
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
set_value(key_index, element, (*attribute.is_index_null)[shard][element_index]);
else
set_value(key_index, element, false);
@ -686,28 +726,39 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type>
template <DictionaryKeyType dictionary_key_type, bool sharded>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
void HashedArrayDictionary<dictionary_key_type, sharded>::getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<ssize_t> & key_index_to_element_index,
const KeyIndexToElementIndex & key_index_to_element_index,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
const size_t keys_size = key_index_to_element_index.size();
size_t shard = 0;
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
bool key_exists = key_index_to_element_index[key_index] != -1;
if (key_exists)
ssize_t element_index;
if constexpr (sharded)
{
size_t element_index = static_cast<size_t>(key_index_to_element_index[key_index]);
const auto & element = attribute_container[element_index];
element_index = key_index_to_element_index[key_index].first;
shard = key_index_to_element_index[key_index].second;
}
else
{
element_index = key_index_to_element_index[key_index];
}
if (element_index != -1)
{
const auto & attribute_container = std::get<AttributeContainerShardsType<AttributeType>>(attribute.containers)[shard];
size_t found_element_index = static_cast<size_t>(element_index);
const auto & element = attribute_container[found_element_index];
if constexpr (is_nullable)
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
set_value(key_index, element, (*attribute.is_index_null)[shard][found_element_index]);
else
set_value(key_index, element, false);
}
@ -721,13 +772,17 @@ void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
}
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::loadData()
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::loadData()
{
if (!source_ptr->hasUpdateField())
{
QueryPipeline pipeline;
pipeline = QueryPipeline(source_ptr->loadAll());
std::optional<DictionaryParallelLoaderType> parallel_loader;
if constexpr (sharded)
parallel_loader.emplace(*this);
QueryPipeline pipeline(source_ptr->loadAll());
DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
UInt64 pull_time_microseconds = 0;
@ -751,10 +806,22 @@ void HashedArrayDictionary<dictionary_key_type>::loadData()
Stopwatch watch_process;
resize(total_rows);
blockToAttributes(block);
if (parallel_loader)
{
parallel_loader->addBlock(block);
}
else
{
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
blockToAttributes(block, arena_holder, /* shard = */ 0);
}
process_time_microseconds += watch_process.elapsedMicroseconds();
}
if (parallel_loader)
parallel_loader->finish();
LOG_DEBUG(&Poco::Logger::get("HashedArrayDictionary"),
"Finished {}reading {} blocks with {} rows from pipeline in {:.2f} sec and inserted into hashtable in {:.2f} sec",
configuration.use_async_executor ? "asynchronous " : "",
@ -765,14 +832,14 @@ void HashedArrayDictionary<dictionary_key_type>::loadData()
updateData();
}
if (configuration.require_nonempty && 0 == element_count)
if (configuration.require_nonempty && 0 == total_element_count)
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
"{}: dictionary source is empty and 'require_nonempty' property is set.",
getFullName());
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildIndexIfNeeded()
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::buildHierarchyParentToChildIndexIfNeeded()
{
if (!dict_struct.hierarchical_attribute_index)
return;
@ -781,12 +848,13 @@ void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildInde
hierarchical_index = getHierarchicalIndex();
}
template <DictionaryKeyType dictionary_key_type>
void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
template <DictionaryKeyType dictionary_key_type, bool sharded>
void HashedArrayDictionary<dictionary_key_type, sharded>::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
bytes_allocated += key_attribute.container.size();
for (const auto & container : key_attribute.containers)
bytes_allocated += container.size();
for (auto & attribute : attributes)
{
@ -796,26 +864,29 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
const auto & container = std::get<AttributeContainerType<ValueType>>(attribute.container);
bytes_allocated += sizeof(AttributeContainerType<ValueType>);
if constexpr (std::is_same_v<ValueType, Array>)
for (const auto & container : std::get<AttributeContainerShardsType<ValueType>>(attribute.containers))
{
/// It is not accurate calculations
bytes_allocated += sizeof(Array) * container.size();
}
else
{
bytes_allocated += container.allocated_bytes();
}
bytes_allocated += sizeof(AttributeContainerType<ValueType>);
bucket_count = container.capacity();
if constexpr (std::is_same_v<ValueType, Array>)
{
/// It is not accurate calculations
bytes_allocated += sizeof(Array) * container.size();
}
else
{
bytes_allocated += container.allocated_bytes();
}
bucket_count = container.capacity();
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_index_null.has_value())
bytes_allocated += (*attribute.is_index_null).size();
for (const auto & container : attribute.is_index_null.value())
bytes_allocated += container.size();
}
if (update_field_loaded_block)
@ -826,18 +897,19 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes();
bytes_allocated += hierarchical_index_bytes_allocated;
}
bytes_allocated += string_arena.allocatedBytes();
for (const auto & string_arena : string_arenas)
bytes_allocated += string_arena->allocatedBytes();
}
template <DictionaryKeyType dictionary_key_type>
Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
template <DictionaryKeyType dictionary_key_type, bool sharded>
Pipe HashedArrayDictionary<dictionary_key_type, sharded>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
{
PaddedPODArray<HashedArrayDictionary::KeyType> keys;
keys.reserve(key_attribute.container.size());
keys.reserve(total_element_count);
for (auto & [key, _] : key_attribute.container)
keys.emplace_back(key);
for (const auto & container : key_attribute.containers)
for (auto & [key, _] : container)
keys.emplace_back(key);
ColumnsWithTypeAndName key_columns;
@ -858,8 +930,10 @@ Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names
return result;
}
template class HashedArrayDictionary<DictionaryKeyType::Simple>;
template class HashedArrayDictionary<DictionaryKeyType::Complex>;
template class HashedArrayDictionary<DictionaryKeyType::Simple, /* sharded */ false>;
template class HashedArrayDictionary<DictionaryKeyType::Simple, /* sharded */ true>;
template class HashedArrayDictionary<DictionaryKeyType::Complex, /* sharded */ false>;
template class HashedArrayDictionary<DictionaryKeyType::Complex, /* sharded */ true>;
void registerDictionaryArrayHashed(DictionaryFactory & factory)
{
@ -886,7 +960,14 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime};
std::string dictionary_layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "hashed_array" : "complex_key_hashed_array";
std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1);
if (shards <= 0 || 128 < shards)
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name);
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast<size_t>(shards)};
ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
const auto & settings = context->getSettingsRef();
@ -895,9 +976,17 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory)
configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor;
if (dictionary_key_type == DictionaryKeyType::Simple)
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple>>(dict_id, dict_struct, std::move(source_ptr), configuration);
{
if (shards > 1)
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
}
else
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
{
if (shards > 1)
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex, true>>(dict_id, dict_struct, std::move(source_ptr), configuration);
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex, false>>(dict_id, dict_struct, std::move(source_ptr), configuration);
}
};
factory.registerLayout("hashed_array",

View File

@ -13,6 +13,7 @@
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/DictionaryHelpers.h>
#include <Dictionaries/HashedDictionaryParallelLoader.h>
/** This dictionary stores all attributes in arrays.
* Key is stored in hash table and value is index into attribute array.
@ -25,12 +26,17 @@ struct HashedArrayDictionaryStorageConfiguration
{
const bool require_nonempty;
const DictionaryLifetime lifetime;
size_t shards = 1;
size_t shard_load_queue_backlog = 10000;
bool use_async_executor = false;
};
template <DictionaryKeyType dictionary_key_type>
template <DictionaryKeyType dictionary_key_type, bool sharded>
class HashedArrayDictionary final : public IDictionary
{
using DictionaryParallelLoaderType = HashedDictionaryImpl::HashedDictionaryParallelLoader<dictionary_key_type, HashedArrayDictionary<dictionary_key_type, sharded>>;
friend class HashedDictionaryImpl::HashedDictionaryParallelLoader<dictionary_key_type, HashedArrayDictionary<dictionary_key_type, sharded>>;
public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
@ -63,13 +69,13 @@ public:
double getHitRate() const override { return 1.0; }
size_t getElementCount() const override { return element_count; }
size_t getElementCount() const override { return total_element_count; }
double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
double getLoadFactor() const override { return static_cast<double>(total_element_count) / bucket_count; }
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<HashedArrayDictionary<dictionary_key_type>>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block);
return std::make_shared<HashedArrayDictionary<dictionary_key_type, sharded>>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block);
}
DictionarySourcePtr getSource() const override { return source_ptr; }
@ -132,50 +138,54 @@ private:
template <typename Value>
using AttributeContainerType = std::conditional_t<std::is_same_v<Value, Array>, std::vector<Value>, PaddedPODArray<Value>>;
template <typename Value>
using AttributeContainerShardsType = std::vector<AttributeContainerType<Value>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::variant<
AttributeContainerType<UInt8>,
AttributeContainerType<UInt16>,
AttributeContainerType<UInt32>,
AttributeContainerType<UInt64>,
AttributeContainerType<UInt128>,
AttributeContainerType<UInt256>,
AttributeContainerType<Int8>,
AttributeContainerType<Int16>,
AttributeContainerType<Int32>,
AttributeContainerType<Int64>,
AttributeContainerType<Int128>,
AttributeContainerType<Int256>,
AttributeContainerType<Decimal32>,
AttributeContainerType<Decimal64>,
AttributeContainerType<Decimal128>,
AttributeContainerType<Decimal256>,
AttributeContainerType<DateTime64>,
AttributeContainerType<Float32>,
AttributeContainerType<Float64>,
AttributeContainerType<UUID>,
AttributeContainerType<IPv4>,
AttributeContainerType<IPv6>,
AttributeContainerType<StringRef>,
AttributeContainerType<Array>>
container;
AttributeContainerShardsType<UInt8>,
AttributeContainerShardsType<UInt16>,
AttributeContainerShardsType<UInt32>,
AttributeContainerShardsType<UInt64>,
AttributeContainerShardsType<UInt128>,
AttributeContainerShardsType<UInt256>,
AttributeContainerShardsType<Int8>,
AttributeContainerShardsType<Int16>,
AttributeContainerShardsType<Int32>,
AttributeContainerShardsType<Int64>,
AttributeContainerShardsType<Int128>,
AttributeContainerShardsType<Int256>,
AttributeContainerShardsType<Decimal32>,
AttributeContainerShardsType<Decimal64>,
AttributeContainerShardsType<Decimal128>,
AttributeContainerShardsType<Decimal256>,
AttributeContainerShardsType<DateTime64>,
AttributeContainerShardsType<Float32>,
AttributeContainerShardsType<Float64>,
AttributeContainerShardsType<UUID>,
AttributeContainerShardsType<IPv4>,
AttributeContainerShardsType<IPv6>,
AttributeContainerShardsType<StringRef>,
AttributeContainerShardsType<Array>>
containers;
std::optional<std::vector<bool>> is_index_null;
/// One container per shard
using RowsMask = std::vector<bool>;
std::optional<std::vector<RowsMask>> is_index_null;
};
struct KeyAttribute final
{
KeyContainerType container;
/// One container per shard
std::vector<KeyContainerType> containers;
};
void createAttributes();
void blockToAttributes(const Block & block);
void blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, size_t shard);
void updateData();
@ -185,6 +195,22 @@ private:
void calculateBytesAllocated();
UInt64 getShard(UInt64 key) const
{
if constexpr (!sharded)
return 0;
/// NOTE: function here should not match with the DefaultHash<> since
/// it used for the HashMap/sparse_hash_map.
return intHashCRC32(key) % configuration.shards;
}
UInt64 getShard(StringRef key) const
{
if constexpr (!sharded)
return 0;
return StringRefHash()(key) % configuration.shards;
}
template <typename KeysProvider>
ColumnPtr getAttributeColumn(
const Attribute & attribute,
@ -200,10 +226,13 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
using KeyIndexToElementIndex = std::conditional_t<sharded, PaddedPODArray<std::pair<ssize_t, UInt8>>, PaddedPODArray<ssize_t>>;
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const PaddedPODArray<ssize_t> & key_index_to_element_index,
const KeyIndexToElementIndex & key_index_to_element_index,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
@ -215,6 +244,8 @@ private:
void resize(size_t total_rows);
Poco::Logger * log;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const HashedArrayDictionaryStorageConfiguration configuration;
@ -225,17 +256,20 @@ private:
size_t bytes_allocated = 0;
size_t hierarchical_index_bytes_allocated = 0;
size_t element_count = 0;
std::atomic<size_t> total_element_count = 0;
std::vector<size_t> element_counts;
size_t bucket_count = 0;
mutable std::atomic<size_t> query_count{0};
mutable std::atomic<size_t> found_count{0};
BlockPtr update_field_loaded_block;
Arena string_arena;
std::vector<std::unique_ptr<Arena>> string_arenas;
DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
};
extern template class HashedArrayDictionary<DictionaryKeyType::Simple>;
extern template class HashedArrayDictionary<DictionaryKeyType::Complex>;
extern template class HashedArrayDictionary<DictionaryKeyType::Simple, false>;
extern template class HashedArrayDictionary<DictionaryKeyType::Simple, true>;
extern template class HashedArrayDictionary<DictionaryKeyType::Complex, false>;
extern template class HashedArrayDictionary<DictionaryKeyType::Complex, true>;
}

View File

@ -71,7 +71,8 @@ struct HashedDictionaryConfiguration
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
class HashedDictionary final : public IDictionary
{
friend class HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>;
using DictionaryParallelLoaderType = HashedDictionaryParallelLoader<dictionary_key_type, HashedDictionary<dictionary_key_type, sparse, sharded>>;
friend class HashedDictionaryParallelLoader<dictionary_key_type, HashedDictionary<dictionary_key_type, sparse, sharded>>;
public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
@ -987,7 +988,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & container = attribute_containers[getShard(key)];
const auto & container = attribute_containers[shard];
const auto it = container.find(key);
if (it != container.end())
@ -1020,11 +1021,11 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
{
if (!source_ptr->hasUpdateField())
{
std::optional<HashedDictionaryParallelLoader<dictionary_key_type, sparse, sharded>> parallel_loader;
std::optional<DictionaryParallelLoaderType> parallel_loader;
if constexpr (sharded)
parallel_loader.emplace(*this);
QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll());
QueryPipeline pipeline(source_ptr->loadAll());
DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor);
Block block;

View File

@ -38,13 +38,12 @@ namespace DB::HashedDictionaryImpl
{
/// Implementation parallel dictionary load for SHARDS
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <DictionaryKeyType dictionary_key_type, typename DictionaryType>
class HashedDictionaryParallelLoader : public boost::noncopyable
{
using HashedDictionary = HashedDictionary<dictionary_key_type, sparse, sharded>;
public:
explicit HashedDictionaryParallelLoader(HashedDictionary & dictionary_)
explicit HashedDictionaryParallelLoader(DictionaryType & dictionary_)
: dictionary(dictionary_)
, shards(dictionary.configuration.shards)
, pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards)
@ -118,7 +117,7 @@ public:
}
private:
HashedDictionary & dictionary;
DictionaryType & dictionary;
const size_t shards;
ThreadPool pool;
std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;

View File

@ -76,7 +76,7 @@ XDBCDictionarySource::XDBCDictionarySource(
, load_all_query(query_builder.composeLoadAllQuery())
, bridge_helper(bridge_)
, bridge_url(bridge_helper->getMainURI())
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), {context_->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}))
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout))
{
auto url_params = bridge_helper->getURLParams(max_block_size);
for (const auto & [name, value] : url_params)

View File

@ -54,8 +54,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
}
const auto & settings = context->getSettingsRef();
const auto & config = context->getConfigRef();
Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0};
const auto & server_settings = context->getServerSettings();
auto res = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
@ -65,7 +64,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
settings.http_send_timeout,
std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
settings.tcp_keep_alive_timeout,
http_keep_alive_timeout),
server_settings.keep_alive_timeout),
credentials,
0,
buf_size,

View File

@ -47,7 +47,7 @@ void WebObjectStorage::initialize(const String & uri_path, const std::unique_loc
ReadWriteBufferFromHTTP::OutStreamCallback(),
ConnectionTimeouts::getHTTPTimeouts(
getContext()->getSettingsRef(),
{getContext()->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}),
getContext()->getServerSettings().keep_alive_timeout),
credentials,
/* max_redirects= */ 0,
/* buffer_size_= */ DBMS_DEFAULT_BUFFER_SIZE,

View File

@ -83,6 +83,10 @@ if (TARGET ch_contrib::sqids)
list (APPEND PRIVATE_LIBS ch_contrib::sqids)
endif()
if (TARGET ch_contrib::idna)
list (APPEND PRIVATE_LIBS ch_contrib::idna)
endif()
if (TARGET ch_contrib::h3)
list (APPEND PRIVATE_LIBS ch_contrib::h3)
endif()

View File

@ -1,6 +1,6 @@
#include "config.h"
#ifdef ENABLE_SQIDS
#if USE_SQIDS
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>

View File

@ -15,24 +15,13 @@
#endif
#include <xxhash.h>
#if USE_BLAKE3
# include <blake3.h>
#endif
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <Common/safe_cast.h>
#include <Common/HashTable/Hash.h>
#if USE_SSL
# include <openssl/md4.h>
# include <openssl/md5.h>
# include <openssl/sha.h>
#if USE_BORINGSSL
# include <openssl/digest.h>
#else
# include <openssl/evp.h>
#endif
#endif
#include <bit>
@ -73,7 +62,6 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED;
extern const int ILLEGAL_COLUMN;
extern const int SUPPORT_IS_DISABLED;
}
namespace impl
@ -191,6 +179,40 @@ T combineHashesFunc(T t1, T t2)
}
struct SipHash64Impl
{
static constexpr auto name = "sipHash64";
using ReturnType = UInt64;
static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); }
static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2); }
static constexpr bool use_int_hash_for_pods = false;
};
struct SipHash64KeyedImpl
{
static constexpr auto name = "sipHash64Keyed";
using ReturnType = UInt64;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2)
{
transformEndianness<std::endian::little>(h1);
transformEndianness<std::endian::little>(h2);
const UInt64 hashes[]{h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), sizeof(hashes));
}
static constexpr bool use_int_hash_for_pods = false;
};
#if USE_SSL
struct HalfMD5Impl
{
@ -225,159 +247,8 @@ struct HalfMD5Impl
static constexpr bool use_int_hash_for_pods = false;
};
struct MD4Impl
{
static constexpr auto name = "MD4";
enum { length = MD4_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD4_CTX ctx;
MD4_Init(&ctx);
MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD4_Final(out_char_data, &ctx);
}
};
struct MD5Impl
{
static constexpr auto name = "MD5";
enum { length = MD5_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD5_Final(out_char_data, &ctx);
}
};
struct SHA1Impl
{
static constexpr auto name = "SHA1";
enum { length = SHA_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA1_Final(out_char_data, &ctx);
}
};
struct SHA224Impl
{
static constexpr auto name = "SHA224";
enum { length = SHA224_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA224_Init(&ctx);
SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA224_Final(out_char_data, &ctx);
}
};
struct SHA256Impl
{
static constexpr auto name = "SHA256";
enum { length = SHA256_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA256_Final(out_char_data, &ctx);
}
};
struct SHA384Impl
{
static constexpr auto name = "SHA384";
enum { length = SHA384_DIGEST_LENGTH };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA512_CTX ctx;
SHA384_Init(&ctx);
SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA384_Final(out_char_data, &ctx);
}
};
struct SHA512Impl
{
static constexpr auto name = "SHA512";
enum { length = 64 };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA512_CTX ctx;
SHA512_Init(&ctx);
SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA512_Final(out_char_data, &ctx);
}
};
struct SHA512Impl256
{
static constexpr auto name = "SHA512_256";
enum { length = 32 };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
/// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default
/// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
/// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
/// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
auto md_ctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
EVP_DigestUpdate(md_ctx, begin, size);
EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
EVP_MD_CTX_destroy(md_ctx);
}
};
#endif
struct SipHash64Impl
{
static constexpr auto name = "sipHash64";
using ReturnType = UInt64;
static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); }
static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2); }
static constexpr bool use_int_hash_for_pods = false;
};
struct SipHash64KeyedImpl
{
static constexpr auto name = "sipHash64Keyed";
using ReturnType = UInt64;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2)
{
transformEndianness<std::endian::little>(h1);
transformEndianness<std::endian::little>(h2);
const UInt64 hashes[]{h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), sizeof(hashes));
}
static constexpr bool use_int_hash_for_pods = false;
};
struct SipHash128Impl
{
static constexpr auto name = "sipHash128";
@ -820,121 +691,6 @@ struct ImplXXH3
static constexpr bool use_int_hash_for_pods = false;
};
struct ImplBLAKE3
{
static constexpr auto name = "BLAKE3";
enum { length = 32 };
#if !USE_BLAKE3
[[noreturn]] static void apply(const char * /*begin*/, const size_t /*size*/, unsigned char * /*out_char_data*/)
{
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled.");
}
#else
static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
{
auto err_msg = blake3_apply_shim(begin, safe_cast<uint32_t>(size), out_char_data);
if (err_msg != nullptr)
{
auto err_st = std::string(err_msg);
blake3_free_char_pointer(err_msg);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st);
}
}
#endif
};
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
return std::make_shared<DataTypeFixedString>(Impl::length);
}
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnString::Chars & data = col_from->getChars();
const typename ColumnString::Offsets & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1,
reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
current_offset = offsets[i];
}
return col_to;
}
else if (
const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
const auto size = col_from_fix->size();
auto & chars_to = col_to->getChars();
const auto length = col_from_fix->getN();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
}
return col_to;
}
else if (
const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnIPv6::Container & data = col_from_ip->getData();
const auto size = col_from_ip->size();
auto & chars_to = col_to->getChars();
const auto length = IPV6_BINARY_LENGTH;
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
}
return col_to;
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
arguments[0].column->getName(), getName());
}
};
DECLARE_MULTITARGET_CODE(
template <typename Impl, typename Name>
@ -1817,15 +1573,7 @@ using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
#if USE_SSL
using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;
using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;
using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;
using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;
#endif
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
@ -1854,7 +1602,6 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
}
#ifdef __clang__

View File

@ -46,19 +46,34 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionWyHash64>();
#if USE_SSL
factory.registerFunction<FunctionHalfMD5>(FunctionDocumentation{
.description = R"(
[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input
parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the
resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is
relatively slow (5 million short strings per second per processor core).
factory.registerFunction<FunctionBLAKE3>(
FunctionDocumentation{
.description=R"(
Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
It returns a BLAKE3 hash as a byte array with type FixedString(32).
)",
.examples{
{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
.categories{"Hash"}
},
FunctionFactory::CaseSensitive);
Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead.
)",
.syntax = "SELECT halfMD5(par1,par2,...,parN);",
.arguments
= {{"par1,par2,...,parN",
R"(
The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated
value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed
Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data).
)"}},
.returned_value = "The computed half MD5 hash of the given input params returned as a "
"[UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.",
.examples
= {{"",
"SELECT HEX(halfMD5('abc', 'cde', 'fgh'));",
R"(
hex(halfMD5('abc', 'cde', 'fgh'))
2C9506B7374CFAF4
)"}}});
#endif
}
}

View File

@ -1,177 +0,0 @@
#include "config.h"
#if USE_SSL
#include "FunctionsHashing.h"
#include <Functions/FunctionFactory.h>
/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
/// to better parallelize the build procedure and avoid MSan build failure
/// due to excessive resource consumption.
namespace DB
{
REGISTER_FUNCTION(HashingSSL)
{
factory.registerFunction<FunctionMD4>(FunctionDocumentation{
.description = R"(Calculates the MD4 hash of the given string.)",
.syntax = "SELECT MD4(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(MD4('abc'));",
R"(
hex(MD4('abc'))
A448017AAF21D8525FC10AE87AA6729D
)"
}}
});
factory.registerFunction<FunctionHalfMD5>(FunctionDocumentation{
.description = R"(
[Interprets](../..//sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input
parameters as strings and calculates the MD5 hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the
resulting string, and interprets them as [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order. The function is
relatively slow (5 million short strings per second per processor core).
Consider using the [sipHash64](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash64) function instead.
)",
.syntax = "SELECT halfMD5(par1,par2,...,parN);",
.arguments = {{"par1,par2,...,parN",
R"(
The function takes a variable number of input parameters. Arguments can be any of the supported data types. For some data types calculated
value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed
Tuple with the same data, Map and the corresponding Array(Tuple(key, value)) type with the same data).
)"
}},
.returned_value
= "The computed half MD5 hash of the given input params returned as a [UInt64](../../../sql-reference/data-types/int-uint.md) in big-endian byte order.",
.examples
= {{"",
"SELECT HEX(halfMD5('abc', 'cde', 'fgh'));",
R"(
hex(halfMD5('abc', 'cde', 'fgh'))
2C9506B7374CFAF4
)"
}}
});
factory.registerFunction<FunctionMD5>(FunctionDocumentation{
.description = R"(Calculates the MD5 hash of the given string.)",
.syntax = "SELECT MD5(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(MD5('abc'));",
R"(
hex(MD5('abc'))
900150983CD24FB0D6963F7D28E17F72
)"
}}
});
factory.registerFunction<FunctionSHA1>(FunctionDocumentation{
.description = R"(Calculates the SHA1 hash of the given string.)",
.syntax = "SELECT SHA1(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA1('abc'));",
R"(
hex(SHA1('abc'))
A9993E364706816ABA3E25717850C26C9CD0D89D
)"
}}
});
factory.registerFunction<FunctionSHA224>(FunctionDocumentation{
.description = R"(Calculates the SHA224 hash of the given string.)",
.syntax = "SELECT SHA224(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA224('abc'));",
R"(
hex(SHA224('abc'))
23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7
)"
}}
});
factory.registerFunction<FunctionSHA256>(FunctionDocumentation{
.description = R"(Calculates the SHA256 hash of the given string.)",
.syntax = "SELECT SHA256(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA256('abc'));",
R"(
hex(SHA256('abc'))
BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD
)"
}}
});
factory.registerFunction<FunctionSHA384>(FunctionDocumentation{
.description = R"(Calculates the SHA384 hash of the given string.)",
.syntax = "SELECT SHA384(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA384('abc'));",
R"(
hex(SHA384('abc'))
CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7
)"
}}
});
factory.registerFunction<FunctionSHA512>(FunctionDocumentation{
.description = R"(Calculates the SHA512 hash of the given string.)",
.syntax = "SELECT SHA512(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA512('abc'));",
R"(
hex(SHA512('abc'))
DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F
)"
}}
});
factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{
.description = R"(Calculates the SHA512_256 hash of the given string.)",
.syntax = "SELECT SHA512_256(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA512_256('abc'));",
R"(
hex(SHA512_256('abc'))
53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23
)"
}}
});
}
}
#endif

View File

@ -0,0 +1,440 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <base/IPv4andIPv6.h>
#include "config.h"
#if USE_BLAKE3
# include <llvm/Support/BLAKE3.h>
#endif
#if USE_SSL
# include <openssl/md4.h>
# include <openssl/md5.h>
# include <openssl/sha.h>
# if USE_BORINGSSL
# include <openssl/digest.h>
# else
# include <openssl/evp.h>
# endif
#endif
/// Instatiating only the functions that require FunctionStringHashFixedString in a separate file
/// to better parallelize the build procedure and avoid MSan build failure
/// due to excessive resource consumption.
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
#if USE_SSL
struct MD4Impl
{
static constexpr auto name = "MD4";
enum
{
length = MD4_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD4_CTX ctx;
MD4_Init(&ctx);
MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD4_Final(out_char_data, &ctx);
}
};
struct MD5Impl
{
static constexpr auto name = "MD5";
enum
{
length = MD5_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD5_Final(out_char_data, &ctx);
}
};
struct SHA1Impl
{
static constexpr auto name = "SHA1";
enum
{
length = SHA_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA1_Final(out_char_data, &ctx);
}
};
struct SHA224Impl
{
static constexpr auto name = "SHA224";
enum
{
length = SHA224_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA224_Init(&ctx);
SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA224_Final(out_char_data, &ctx);
}
};
struct SHA256Impl
{
static constexpr auto name = "SHA256";
enum
{
length = SHA256_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA256_Final(out_char_data, &ctx);
}
};
struct SHA384Impl
{
static constexpr auto name = "SHA384";
enum
{
length = SHA384_DIGEST_LENGTH
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA512_CTX ctx;
SHA384_Init(&ctx);
SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA384_Final(out_char_data, &ctx);
}
};
struct SHA512Impl
{
static constexpr auto name = "SHA512";
enum
{
length = 64
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA512_CTX ctx;
SHA512_Init(&ctx);
SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA512_Final(out_char_data, &ctx);
}
};
struct SHA512Impl256
{
static constexpr auto name = "SHA512_256";
enum
{
length = 32
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
/// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default
/// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
/// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
/// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
auto md_ctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
EVP_DigestUpdate(md_ctx, begin, size);
EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
EVP_MD_CTX_destroy(md_ctx);
}
};
#endif
#if USE_BLAKE3
struct ImplBLAKE3
{
static constexpr auto name = "BLAKE3";
enum
{
length = 32
};
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);
auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);
llvm::BLAKE3 hasher;
if (size > 0)
hasher.update(llvm::StringRef(begin, size));
hasher.final(result);
}
};
#endif
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
return std::make_shared<DataTypeFixedString>(Impl::length);
}
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnString::Chars & data = col_from->getChars();
const typename ColumnString::Offsets & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1,
reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
current_offset = offsets[i];
}
return col_to;
}
else if (const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
const auto size = col_from_fix->size();
auto & chars_to = col_to->getChars();
const auto length = col_from_fix->getN();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
}
return col_to;
}
else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnIPv6::Container & data = col_from_ip->getData();
const auto size = col_from_ip->size();
auto & chars_to = col_to->getChars();
const auto length = IPV6_BINARY_LENGTH;
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
}
return col_to;
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first argument of function {}",
arguments[0].column->getName(),
getName());
}
};
#if USE_SSL || USE_BLAKE3
REGISTER_FUNCTION(HashFixedStrings)
{
# if USE_SSL
using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;
using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;
using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;
using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;
factory.registerFunction<FunctionMD4>(FunctionDocumentation{
.description = R"(Calculates the MD4 hash of the given string.)",
.syntax = "SELECT MD4(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(MD4('abc'));",
R"(
hex(MD4('abc'))
A448017AAF21D8525FC10AE87AA6729D
)"}}});
factory.registerFunction<FunctionMD5>(FunctionDocumentation{
.description = R"(Calculates the MD5 hash of the given string.)",
.syntax = "SELECT MD5(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(MD5('abc'));",
R"(
hex(MD5('abc'))
900150983CD24FB0D6963F7D28E17F72
)"}}});
factory.registerFunction<FunctionSHA1>(FunctionDocumentation{
.description = R"(Calculates the SHA1 hash of the given string.)",
.syntax = "SELECT SHA1(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA1('abc'));",
R"(
hex(SHA1('abc'))
A9993E364706816ABA3E25717850C26C9CD0D89D
)"}}});
factory.registerFunction<FunctionSHA224>(FunctionDocumentation{
.description = R"(Calculates the SHA224 hash of the given string.)",
.syntax = "SELECT SHA224(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA224('abc'));",
R"(
hex(SHA224('abc'))
23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7
)"}}});
factory.registerFunction<FunctionSHA256>(FunctionDocumentation{
.description = R"(Calculates the SHA256 hash of the given string.)",
.syntax = "SELECT SHA256(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA256('abc'));",
R"(
hex(SHA256('abc'))
BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD
)"}}});
factory.registerFunction<FunctionSHA384>(FunctionDocumentation{
.description = R"(Calculates the SHA384 hash of the given string.)",
.syntax = "SELECT SHA384(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA384('abc'));",
R"(
hex(SHA384('abc'))
CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7
)"}}});
factory.registerFunction<FunctionSHA512>(FunctionDocumentation{
.description = R"(Calculates the SHA512 hash of the given string.)",
.syntax = "SELECT SHA512(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA512('abc'));",
R"(
hex(SHA512('abc'))
DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F
)"}}});
factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{
.description = R"(Calculates the SHA512_256 hash of the given string.)",
.syntax = "SELECT SHA512_256(s);",
.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
.returned_value
= "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
.examples
= {{"",
"SELECT HEX(SHA512_256('abc'));",
R"(
hex(SHA512_256('abc'))
53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23
)"}}});
# endif
# if USE_BLAKE3
using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
factory.registerFunction<FunctionBLAKE3>(
FunctionDocumentation{
.description = R"(
Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
It returns a BLAKE3 hash as a byte array with type FixedString(32).
)",
.examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
.categories{"Hash"}},
FunctionFactory::CaseSensitive);
# endif
}
#endif
}

165
src/Functions/punycode.cpp Normal file
View File

@ -0,0 +1,165 @@
#include "config.h"
#if USE_IDNA
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
#endif
# include <ada/idna/punycode.h>
# include <ada/idna/unicode_transcoding.h>
#ifdef __clang__
# pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
}
struct PunycodeEncodeImpl
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_puny;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const size_t value_utf32_length = ada::idna::utf32_length_from_utf8(value, value_length);
value_utf32.resize(value_utf32_length);
ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
if (!ok)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode encoding");
res_data.insert(value_puny.c_str(), value_puny.c_str() + value_puny.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear();
value_puny.clear(); /// utf32_to_punycode() appends to its output string
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeEncode function");
}
};
struct PunycodeDecodeImpl
{
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_utf8;
for (size_t row = 0; row < rows; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
const std::string_view value_punycode(value, value_length);
const bool ok = ada::idna::punycode_to_utf32(value_punycode, value_utf32);
if (!ok)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Internal error during Punycode decoding");
const size_t utf8_length = ada::idna::utf8_length_from_utf32(value_utf32.data(), value_utf32.size());
value_utf8.resize(utf8_length);
ada::idna::utf32_to_utf8(value_utf32.data(), value_utf32.size(), value_utf8.data());
res_data.insert(value_utf8.c_str(), value_utf8.c_str() + value_utf8.size() + 1);
res_offsets.push_back(res_data.size());
prev_offset = offsets[row];
value_utf32.clear(); /// punycode_to_utf32() appends to its output string
value_utf8.clear();
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by punycodeDecode function");
}
};
struct NamePunycodeEncode
{
static constexpr auto name = "punycodeEncode";
};
struct NamePunycodeDecode
{
static constexpr auto name = "punycodeDecode";
};
REGISTER_FUNCTION(Punycode)
{
factory.registerFunction<FunctionStringToString<PunycodeEncodeImpl, NamePunycodeEncode>>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string.)",
.syntax="punycodeEncode(str)",
.arguments={{"str", "Input string"}},
.returned_value="The punycode representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeEncode('München') AS puny;",
R"(
puny
Mnchen-3ya
)"
}}
});
factory.registerFunction<FunctionStringToString<PunycodeDecodeImpl, NamePunycodeDecode>>(FunctionDocumentation{
.description=R"(
Computes a Punycode representation of a string.)",
.syntax="punycodeDecode(str)",
.arguments={{"str", "A Punycode-encoded string"}},
.returned_value="The plaintext representation [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT punycodeDecode('Mnchen-3ya') AS plain;",
R"(
plain
München
)"
}}
});
}
}
#endif

View File

@ -137,7 +137,7 @@ AIOContext::AIOContext(unsigned int)
{
ctx = io_setup();
if (ctx < 0)
throw ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
throw DB::ErrnoException(DB::ErrorCodes::CANNOT_IOSETUP, "io_setup failed");
}
AIOContext::~AIOContext()

View File

@ -133,6 +133,22 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
settings.http_receive_timeout);
}
ConnectionTimeouts ConnectionTimeouts::getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings)
{
auto timeouts = getHTTPTimeouts(user_settings, server_settings.keep_alive_timeout);
if (server_settings.replicated_fetches_http_connection_timeout.changed)
timeouts.connection_timeout = server_settings.replicated_fetches_http_connection_timeout;
if (server_settings.replicated_fetches_http_send_timeout.changed)
timeouts.send_timeout = server_settings.replicated_fetches_http_send_timeout;
if (server_settings.replicated_fetches_http_receive_timeout.changed)
timeouts.receive_timeout = server_settings.replicated_fetches_http_receive_timeout;
return timeouts;
}
class SendReceiveTimeoutsForFirstAttempt
{
private:

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Defines.h>
#include <Core/ServerSettings.h>
#include <Interpreters/Context_fwd.h>
#include <Poco/Timespan.h>
@ -68,6 +69,8 @@ struct ConnectionTimeouts
static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings);
static ConnectionTimeouts getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout);
static ConnectionTimeouts getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings);
ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const;
};

View File

@ -120,6 +120,7 @@ struct ReadSettings
size_t http_retry_initial_backoff_ms = 100;
size_t http_retry_max_backoff_ms = 1600;
bool http_skip_not_found_url_for_globs = true;
bool http_make_head_request = true;
/// Monitoring
bool for_object_storage = false; // to choose which profile events should be incremented

View File

@ -808,6 +808,11 @@ std::optional<time_t> ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::tryGetLa
template <typename UpdatableSessionPtr>
HTTPFileInfo ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::getFileInfo()
{
/// May be disabled in case the user knows in advance that the server doesn't support HEAD requests.
/// Allows to avoid making unnecessary requests in such cases.
if (!settings.http_make_head_request)
return HTTPFileInfo{};
Poco::Net::HTTPResponse response;
try
{
@ -920,13 +925,12 @@ PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_,
OutStreamCallback out_stream_callback_,
const ConnectionTimeouts & timeouts_,
const Poco::Net::HTTPBasicCredentials & credentials_,
size_t buffer_size_,
const UInt64 max_redirects,
size_t max_connections_per_endpoint)
PooledSessionFactoryPtr session_factory)
: Parent(
std::make_shared<SessionType>(uri_, max_redirects, std::make_shared<PooledSessionFactory>(timeouts_, max_connections_per_endpoint)),
std::make_shared<SessionType>(uri_, max_redirects, session_factory),
uri_,
credentials_,
method_,

View File

@ -265,6 +265,8 @@ private:
size_t per_endpoint_pool_size;
};
using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession<PooledSessionFactory>>>
{
using SessionType = UpdatableSession<PooledSessionFactory>;
@ -273,13 +275,12 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
public:
explicit PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts_ = {},
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const UInt64 max_redirects = 0,
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT);
const std::string & method_,
OutStreamCallback out_stream_callback_,
const Poco::Net::HTTPBasicCredentials & credentials_,
size_t buffer_size_,
const UInt64 max_redirects,
PooledSessionFactoryPtr session_factory);
};

View File

@ -707,7 +707,7 @@ KeyMetadata::iterator FileCache::addFileSegment(
stash_records.emplace(
stash_key, stash->queue->add(locked_key.getKeyMetadata(), offset, 0, *lock));
if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit())
if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit(*lock))
stash->queue->pop(*lock);
result_state = FileSegment::State::DETACHED;
@ -748,7 +748,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
LOG_TEST(
log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}",
size, file_segment.key(), file_segment.offset(),
main_priority->getSize(cache_lock), main_priority->getSizeLimit());
main_priority->getSize(cache_lock), main_priority->getSizeLimit(cache_lock));
/// In case of per query cache limit (by default disabled), we add/remove entries from both
/// (main_priority and query_priority) priority queues, but iterate entries in order of query_priority,
@ -760,7 +760,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
{
query_priority = &query_context->getPriority();
const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit();
const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit(cache_lock);
if (query_limit_exceeded && !query_context->recacheOnFileCacheQueryLimitExceeded())
{
LOG_TEST(log, "Query limit exceeded, space reservation failed, "
@ -771,7 +771,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCa
LOG_TEST(
log, "Using query limit, current usage: {}/{} (while reserving for {}:{})",
query_priority->getSize(cache_lock), query_priority->getSizeLimit(),
query_priority->getSize(cache_lock), query_priority->getSizeLimit(cache_lock),
file_segment.key(), file_segment.offset());
}
@ -1066,9 +1066,11 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
bool limits_satisfied;
IFileCachePriority::IteratorPtr cache_it;
size_t size_limit = 0;
{
auto lock = lockCache();
size_limit = main_priority->getSizeLimit(lock);
limits_satisfied = main_priority->canFit(size, lock);
if (limits_satisfied)
@ -1118,7 +1120,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir)
log,
"Cache capacity changed (max size: {}), "
"cached file `{}` does not fit in cache anymore (size: {})",
main_priority->getSizeLimit(), offset_it->path().string(), size);
size_limit, offset_it->path().string(), size);
fs::remove(offset_it->path());
}
@ -1222,7 +1224,8 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings,
std::lock_guard lock(apply_settings_mutex);
if (metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit))
if (new_settings.background_download_queue_size_limit != actual_settings.background_download_queue_size_limit
&& metadata.setBackgroundDownloadQueueSizeLimit(new_settings.background_download_queue_size_limit))
{
LOG_INFO(log, "Changed background_download_queue_size from {} to {}",
actual_settings.background_download_queue_size_limit,
@ -1231,24 +1234,57 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings,
actual_settings.background_download_queue_size_limit = new_settings.background_download_queue_size_limit;
}
bool updated;
try
if (new_settings.background_download_threads != actual_settings.background_download_threads)
{
updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads);
}
catch (...)
{
actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads();
throw;
bool updated = false;
try
{
updated = metadata.setBackgroundDownloadThreads(new_settings.background_download_threads);
}
catch (...)
{
actual_settings.background_download_threads = metadata.getBackgroundDownloadThreads();
throw;
}
if (updated)
{
LOG_INFO(log, "Changed background_download_threads from {} to {}",
actual_settings.background_download_threads,
new_settings.background_download_threads);
actual_settings.background_download_threads = new_settings.background_download_threads;
}
}
if (updated)
{
LOG_INFO(log, "Changed background_download_threads from {} to {}",
actual_settings.background_download_threads,
new_settings.background_download_threads);
actual_settings.background_download_threads = new_settings.background_download_threads;
if (new_settings.max_size != actual_settings.max_size
|| new_settings.max_elements != actual_settings.max_elements)
{
auto cache_lock = lockCache();
bool updated = false;
try
{
updated = main_priority->modifySizeLimits(
new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock);
}
catch (...)
{
actual_settings.max_size = main_priority->getSizeLimit(cache_lock);
actual_settings.max_elements = main_priority->getElementsLimit(cache_lock);
throw;
}
if (updated)
{
LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}",
actual_settings.max_size, new_settings.max_size,
actual_settings.max_elements, new_settings.max_elements);
actual_settings.max_size = main_priority->getSizeLimit(cache_lock);
actual_settings.max_elements = main_priority->getElementsLimit(cache_lock);
}
}
}

View File

@ -25,6 +25,12 @@ FileCacheSettings FileCacheFactory::FileCacheData::getSettings() const
return settings;
}
void FileCacheFactory::FileCacheData::setSettings(const FileCacheSettings & new_settings)
{
std::lock_guard lock(settings_mutex);
settings = new_settings;
}
FileCacheFactory & FileCacheFactory::instance()
{
static FileCacheFactory ret;
@ -100,21 +106,23 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig
FileCacheSettings new_settings;
new_settings.loadFromConfig(config, cache_info->config_path);
FileCacheSettings old_settings;
{
std::lock_guard lock(cache_info->settings_mutex);
if (new_settings == cache_info->settings)
continue;
FileCacheSettings old_settings = cache_info->getSettings();
if (old_settings == new_settings)
continue;
old_settings = cache_info->settings;
try
{
cache_info->cache->applySettingsIfPossible(new_settings, old_settings);
}
catch (...)
{
/// Settings changes could be partially applied in case of exception,
/// make sure cache_info->settings show correct state of applied settings.
cache_info->setSettings(old_settings);
throw;
}
cache_info->cache->applySettingsIfPossible(new_settings, old_settings);
{
std::lock_guard lock(cache_info->settings_mutex);
cache_info->settings = old_settings;
}
cache_info->setSettings(old_settings);
}
}

View File

@ -24,6 +24,8 @@ public:
FileCacheSettings getSettings() const;
void setSettings(const FileCacheSettings & new_settings);
const FileCachePtr cache;
const std::string config_path;

View File

@ -55,9 +55,9 @@ public:
virtual ~IFileCachePriority() = default;
size_t getElementsLimit() const { return max_elements; }
size_t getElementsLimit(const CacheGuard::Lock &) const { return max_elements; }
size_t getSizeLimit() const { return max_size; }
size_t getSizeLimit(const CacheGuard::Lock &) const { return max_size; }
virtual size_t getSize(const CacheGuard::Lock &) const = 0;
@ -86,9 +86,11 @@ public:
FinalizeEvictionFunc & finalize_eviction_func,
const CacheGuard::Lock &) = 0;
virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) = 0;
protected:
const size_t max_size = 0;
const size_t max_elements = 0;
size_t max_size = 0;
size_t max_elements = 0;
};
}

View File

@ -16,6 +16,9 @@ namespace ProfileEvents
{
extern const Event FilesystemCacheEvictionSkippedFileSegments;
extern const Event FilesystemCacheEvictionTries;
extern const Event FilesystemCacheEvictMicroseconds;
extern const Event FilesystemCacheEvictedBytes;
extern const Event FilesystemCacheEvictedFileSegments;
}
namespace DB
@ -36,7 +39,7 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT
return std::make_shared<LRUIterator>(add(Entry(key_metadata->key, offset, size, key_metadata), lock));
}
LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock &)
LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, const CacheGuard::Lock & lock)
{
if (entry.size == 0)
{
@ -59,7 +62,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(Entry && entry, cons
}
#endif
const auto & size_limit = getSizeLimit();
const auto & size_limit = getSizeLimit(lock);
if (size_limit && current_size + entry.size > size_limit)
{
throw Exception(
@ -288,6 +291,51 @@ std::vector<FileSegmentInfo> LRUFileCachePriority::dump(const CacheGuard::Lock &
return res;
}
bool LRUFileCachePriority::modifySizeLimits(
size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CacheGuard::Lock & lock)
{
if (max_size == max_size_ && max_elements == max_elements_)
return false; /// Nothing to change.
auto check_limits_satisfied = [&]()
{
return (max_size_ == 0 || current_size <= max_size_)
&& (max_elements_ == 0 || current_elements_num <= max_elements_);
};
if (check_limits_satisfied())
{
max_size = max_size_;
max_elements = max_elements_;
return true;
}
auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
{
chassert(segment_metadata->file_segment->assertCorrectness());
if (!segment_metadata->releasable())
return IterationResult::CONTINUE;
auto segment = segment_metadata->file_segment;
locked_key.removeFileSegment(segment->offset(), segment->lock());
ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments);
ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize());
return IterationResult::REMOVE_AND_CONTINUE;
};
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds);
iterate(
[&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
{ return check_limits_satisfied() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); },
lock);
max_size = max_size_;
max_elements = max_elements_;
return true;
}
void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock)
{
assertValid();

View File

@ -48,6 +48,8 @@ public:
void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); }
bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override;
private:
void updateElementsCount(int64_t num);
void updateSize(int64_t size);

View File

@ -687,7 +687,7 @@ void CacheMetadata::startup()
download_threads.emplace_back(std::make_shared<DownloadThread>());
download_threads.back()->thread = std::make_unique<ThreadFromGlobalPool>([this, thread = download_threads.back()] { downloadThreadFunc(thread->stop_flag); });
}
cleanup_thread = std::make_unique<ThreadFromGlobalPool>(std::function{ [this]{ cleanupThreadFunc(); }});
cleanup_thread = std::make_unique<ThreadFromGlobalPool>([this]{ cleanupThreadFunc(); });
}
void CacheMetadata::shutdown()
@ -714,10 +714,10 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num)
if (threads_num == download_threads_num)
return false;
SCOPE_EXIT({ download_threads_num = download_threads.size(); });
if (threads_num > download_threads_num)
{
SCOPE_EXIT({ download_threads_num = download_threads.size(); });
size_t add_threads = threads_num - download_threads_num;
for (size_t i = 0; i < add_threads; ++i)
{
@ -745,7 +745,6 @@ bool CacheMetadata::setBackgroundDownloadThreads(size_t threads_num)
}
download_queue->cv.notify_all();
SCOPE_EXIT({ download_threads_num = download_threads.size(); });
for (size_t i = 0; i < remove_threads; ++i)
{

View File

@ -21,14 +21,15 @@ namespace
SLRUFileCachePriority::SLRUFileCachePriority(
size_t max_size_,
size_t max_elements_,
double size_ratio)
double size_ratio_)
: IFileCachePriority(max_size_, max_elements_)
, size_ratio(size_ratio_)
, protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio)))
, probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio)))
{
LOG_DEBUG(
log, "Using probationary queue size: {}, protected queue size: {}",
probationary_queue.getSizeLimit(), protected_queue.getSizeLimit());
probationary_queue.max_size, protected_queue.max_elements);
}
size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const
@ -151,7 +152,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach
/// Entry is in probationary queue.
/// We need to move it to protected queue.
const size_t size = iterator.getEntry().size;
if (size > protected_queue.getSizeLimit())
if (size > protected_queue.getSizeLimit(lock))
{
/// Entry size is bigger than the whole protected queue limit.
/// This is only possible if protected_queue_size_limit is less than max_file_segment_size,
@ -235,6 +236,21 @@ void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock)
probationary_queue.shuffle(lock);
}
bool SLRUFileCachePriority::modifySizeLimits(
size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock & lock)
{
if (max_size == max_size_ && max_elements == max_elements_ && size_ratio == size_ratio_)
return false; /// Nothing to change.
protected_queue.modifySizeLimits(getRatio(max_size_, size_ratio_), getRatio(max_elements_, size_ratio_), 0, lock);
probationary_queue.modifySizeLimits(getRatio(max_size_, 1 - size_ratio_), getRatio(max_elements_, 1 - size_ratio_), 0, lock);
max_size = max_size_;
max_elements = max_elements_;
size_ratio = size_ratio_;
return true;
}
SLRUFileCachePriority::SLRUIterator::SLRUIterator(
SLRUFileCachePriority * cache_priority_,
LRUFileCachePriority::LRUIterator && lru_iterator_,

View File

@ -18,7 +18,7 @@ private:
public:
class SLRUIterator;
SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio);
SLRUFileCachePriority(size_t max_size_, size_t max_elements_, double size_ratio_);
size_t getSize(const CacheGuard::Lock & lock) const override;
@ -45,7 +45,10 @@ public:
std::vector<FileSegmentInfo> dump(const CacheGuard::Lock &) override;
bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override;
private:
double size_ratio;
LRUFileCachePriority protected_queue;
LRUFileCachePriority probationary_queue;
Poco::Logger * log = &Poco::Logger::get("SLRUFileCachePriority");

View File

@ -135,7 +135,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
}
/// disable parallel replicas if cluster contains only shards with 1 replica
if (context->canUseParallelReplicas())
if (context->canUseTaskBasedParallelReplicas())
{
bool disable_parallel_replicas = true;
for (const auto & shard : cluster.getShardsInfo())
@ -265,7 +265,7 @@ void executeQuery(
// decide for each shard if parallel reading from replicas should be enabled
// according to settings and number of replicas declared per shard
const auto & addresses = cluster->getShardsAddresses().at(i);
bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas();
bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas();
stream_factory.createForShard(
shard_info,

View File

@ -74,6 +74,7 @@
#include <Interpreters/Session.h>
#include <Interpreters/TraceCollector.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/UncompressedCache.h>
#include <IO/MMappedFileCache.h>
#include <IO/WriteSettings.h>
@ -361,6 +362,8 @@ struct ContextSharedPart : boost::noncopyable
OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex);
OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex);
OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex);
/// The global pool of HTTP sessions for background fetches.
PooledSessionFactoryPtr fetches_session_factory TSA_GUARDED_BY(background_executors_mutex);
RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml
HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml
@ -4050,7 +4053,8 @@ void Context::checkCanBeDropped(const String & database, const String & table, c
"2. File '{}' intended to force DROP {}\n"
"How to fix this:\n"
"1. Either increase (or set to zero) max_[table/partition]_size_to_drop in server config\n"
"2. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
"2. Either pass a bigger (or set to zero) max_[table/partition]_size_to_drop through query settings\n"
"3. Either create forcing file {} and make sure that ClickHouse has write permission for it.\n"
"Example:\nsudo touch '{}' && sudo chmod 666 '{}'",
backQuoteIfNeed(database), backQuoteIfNeed(table),
size_str, max_size_to_drop_str,
@ -4078,6 +4082,10 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
checkCanBeDropped(database, table, table_size, max_table_size_to_drop);
}
void Context::checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const
{
checkCanBeDropped(database, table, table_size, max_table_size_to_drop);
}
void Context::setMaxPartitionSizeToDrop(size_t max_size)
{
@ -4097,6 +4105,10 @@ void Context::checkPartitionCanBeDropped(const String & database, const String &
checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop);
}
void Context::checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const
{
checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop);
}
InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional<FormatSettings> & format_settings, const std::optional<size_t> max_parsing_threads) const
{
@ -4815,6 +4827,11 @@ void Context::initializeBackgroundExecutorsIfNeeded()
);
LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
auto timeouts = ConnectionTimeouts::getFetchPartHTTPTimeouts(getServerSettings(), getSettingsRef());
/// The number of background fetches is limited by the number of threads in the background thread pool.
/// It doesn't make any sense to limit the number of connections per host any further.
shared->fetches_session_factory = std::make_shared<PooledSessionFactory>(timeouts, background_fetches_pool_size);
shared->fetch_executor = std::make_shared<OrdinaryBackgroundExecutor>
(
"Fetch",
@ -4868,6 +4885,12 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const
return shared->common_executor;
}
PooledSessionFactoryPtr Context::getCommonFetchesSessionFactory() const
{
SharedLockGuard lock(shared->background_executors_mutex);
return shared->fetches_session_factory;
}
IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
{
callOnce(shared->readers_initialized, [&] {
@ -4973,6 +4996,7 @@ ReadSettings Context::getReadSettings() const
res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms;
res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms;
res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs;
res.http_make_head_request = settings.http_make_head_request;
res.mmap_cache = getMMappedFileCache().get();
@ -5017,7 +5041,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
return SAMPLE_KEY;
}
bool Context::canUseParallelReplicas() const
bool Context::canUseTaskBasedParallelReplicas() const
{
const auto & settings_ref = getSettingsRef();
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1;
@ -5025,12 +5049,12 @@ bool Context::canUseParallelReplicas() const
bool Context::canUseParallelReplicasOnInitiator() const
{
return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator;
return canUseTaskBasedParallelReplicas() && !getClientInfo().collaborate_with_initiator;
}
bool Context::canUseParallelReplicasOnFollower() const
{
return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator;
return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator;
}
void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache)

View File

@ -202,6 +202,9 @@ using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;
class PreparedSetsCache;
using PreparedSetsCachePtr = std::shared_ptr<PreparedSetsCache>;
class PooledSessionFactory;
using PooledSessionFactoryPtr = std::shared_ptr<PooledSessionFactory>;
class SessionTracker;
struct ServerSettings;
@ -1081,11 +1084,13 @@ public:
void setMaxTableSizeToDrop(size_t max_size);
size_t getMaxTableSizeToDrop() const;
void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size, const size_t & max_table_size_to_drop) const;
/// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
void setMaxPartitionSizeToDrop(size_t max_size);
size_t getMaxPartitionSizeToDrop() const;
void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const;
void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const;
/// Lets you select the compression codec according to the conditions described in the configuration file.
std::shared_ptr<ICompressionCodec> chooseCompressionCodec(size_t part_size, double part_size_ratio) const;
@ -1211,6 +1216,7 @@ public:
OrdinaryBackgroundExecutorPtr getMovesExecutor() const;
OrdinaryBackgroundExecutorPtr getFetchesExecutor() const;
OrdinaryBackgroundExecutorPtr getCommonExecutor() const;
PooledSessionFactoryPtr getCommonFetchesSessionFactory() const;
IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const;
#if USE_LIBURING
@ -1228,7 +1234,7 @@ public:
WriteSettings getWriteSettings() const;
/** There are multiple conditions that have to be met to be able to use parallel replicas */
bool canUseParallelReplicas() const;
bool canUseTaskBasedParallelReplicas() const;
bool canUseParallelReplicasOnInitiator() const;
bool canUseParallelReplicasOnFollower() const;

View File

@ -858,11 +858,8 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
bool ExpressionAnalyzer::isRemoteStorage() const
{
const Settings & csettings = getContext()->getSettingsRef();
// Consider any storage used in parallel replicas as remote, so the query is executed in multiple servers
const bool enable_parallel_processing_of_joins
= csettings.max_parallel_replicas > 1 && csettings.allow_experimental_parallel_reading_from_replicas > 0;
return syntax->is_remote_storage || enable_parallel_processing_of_joins;
return syntax->is_remote_storage || getContext()->canUseTaskBasedParallelReplicas();
}
const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const

View File

@ -481,7 +481,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Check support for FINAL for parallel replicas
bool is_query_with_final = isQueryWithFinal(query_info);
if (is_query_with_final && settings.allow_experimental_parallel_reading_from_replicas > 0)
if (is_query_with_final && context->canUseTaskBasedParallelReplicas())
{
if (settings.allow_experimental_parallel_reading_from_replicas == 1)
{
@ -2942,6 +2942,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
auto sorting_step = std::make_unique<SortingStep>(
query_plan.getCurrentDataStream(),
window.full_sort_description,
window.partition_by,
0 /* LIMIT */,
sort_settings,
settings.optimize_sorting_by_input_stream_properties);

View File

@ -8,6 +8,7 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTLiteral.h>
namespace DB
{
@ -126,7 +127,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS
if (const auto * identifier = node->as<ASTIdentifier>())
data.addColumnIdentifier(*identifier);
else
else if (!node->as<ASTLiteral>())
data.addColumnAliasIfAny(*node);
}

View File

@ -73,6 +73,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNEXPECTED_EXPRESSION;
}
namespace
@ -776,6 +777,37 @@ void expandGroupByAll(ASTSelectQuery * select_query)
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
void expandOrderByAll(ASTSelectQuery * select_query)
{
auto * all_elem = select_query->orderBy()->children[0]->as<ASTOrderByElement>();
if (!all_elem)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not order by asts.");
auto order_expression_list = std::make_shared<ASTExpressionList>();
for (const auto & expr : select_query->select()->children)
{
if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function = expr->as<ASTFunction>(); function != nullptr)
if (Poco::toUpper(function->alias) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
auto elem = std::make_shared<ASTOrderByElement>();
elem->direction = all_elem->direction;
elem->nulls_direction = all_elem->nulls_direction;
elem->nulls_direction_was_explicitly_specified = all_elem->nulls_direction_was_explicitly_specified;
elem->children.push_back(expr);
order_expression_list->children.push_back(elem);
}
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, order_expression_list);
}
ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
@ -1292,6 +1324,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
if (select_query->group_by_all)
expandGroupByAll(select_query);
// expand ORDER BY ALL
if (settings.enable_order_by_all && select_query->order_by_all)
expandOrderByAll(select_query);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)

View File

@ -144,7 +144,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
}
if (orderBy())
if (!order_by_all && orderBy())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : "");
s.one_line
@ -163,6 +163,24 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
}
}
if (order_by_all)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
auto * elem = orderBy()->children[0]->as<ASTOrderByElement>();
s.ostr << (s.hilite ? hilite_keyword : "")
<< (elem->direction == -1 ? " DESC" : " ASC")
<< (s.hilite ? hilite_none : "");
if (elem->nulls_direction_was_explicitly_specified)
{
s.ostr << (s.hilite ? hilite_keyword : "")
<< " NULLS "
<< (elem->nulls_direction == elem->direction ? "LAST" : "FIRST")
<< (s.hilite ? hilite_none : "");
}
}
if (limitByLength())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : "");

View File

@ -87,6 +87,7 @@ public:
bool group_by_with_cube = false;
bool group_by_with_constant_keys = false;
bool group_by_with_grouping_sets = false;
bool order_by_all = false;
bool limit_with_ties = false;
ASTPtr & refSelect() { return getExpression(Expression::SELECT); }

View File

@ -14,6 +14,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTIdentifier.h>
#include <Poco/String.h>
namespace DB
@ -287,6 +288,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
interpolate_expression_list = std::make_shared<ASTExpressionList>();
}
}
else if (order_expression_list->children.size() == 1)
{
/// ORDER BY ALL
auto * identifier = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTIdentifier>();
if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL")
select_query->order_by_all = true;
}
}
/// This is needed for TOP expression, because it can also use WITH TIES.

View File

@ -915,6 +915,7 @@ void addWindowSteps(QueryPlan & query_plan,
auto sorting_step = std::make_unique<SortingStep>(
query_plan.getCurrentDataStream(),
window_description.full_sort_description,
window_description.partition_by,
0 /*limit*/,
sort_settings,
settings.optimize_sorting_by_input_stream_properties);
@ -1340,7 +1341,7 @@ void Planner::buildPlanForQueryNode()
const auto & settings = query_context->getSettingsRef();
if (settings.allow_experimental_parallel_reading_from_replicas > 0)
if (query_context->canUseTaskBasedParallelReplicas())
{
const auto & table_expression_nodes = planner_context->getTableExpressionNodeToData();
for (const auto & it : table_expression_nodes)
@ -1366,7 +1367,7 @@ void Planner::buildPlanForQueryNode()
}
}
if (settings.allow_experimental_parallel_reading_from_replicas > 0 || !settings.parallel_replicas_custom_key.value.empty())
if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty())
{
/// Check support for JOIN for parallel replicas with custom key
if (planner_context->getTableExpressionNodeToData().size() > 1)

View File

@ -236,7 +236,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
scalars["_shard_num"]
= Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared<DataTypeUInt32>(), "_shard_num"}};
if (context->canUseParallelReplicas())
if (context->canUseTaskBasedParallelReplicas())
{
if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
{
@ -258,7 +258,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage);
remote_query_executor->setLogger(log);
if (context->canUseParallelReplicas())
if (context->canUseTaskBasedParallelReplicas())
{
// when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard:
// establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard.

View File

@ -1,3 +1,4 @@
#include <memory>
#include <stdexcept>
#include <IO/Operators.h>
#include <Processors/Merges/MergingSortedTransform.h>
@ -9,6 +10,8 @@
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Common/JSONBuilder.h>
#include <Processors/ResizeProcessor.h>
#include <Processors/Transforms/ScatterByPartitionTransform.h>
namespace CurrentMetrics
{
@ -76,6 +79,21 @@ SortingStep::SortingStep(
output_stream->sort_scope = DataStream::SortScope::Global;
}
SortingStep::SortingStep(
const DataStream & input_stream,
const SortDescription & description_,
const SortDescription & partition_by_description_,
UInt64 limit_,
const Settings & settings_,
bool optimize_sorting_by_input_stream_properties_)
: SortingStep(input_stream, description_, limit_, settings_, optimize_sorting_by_input_stream_properties_)
{
partition_by_description = partition_by_description_;
output_stream->sort_description = result_description;
output_stream->sort_scope = DataStream::SortScope::Stream;
}
SortingStep::SortingStep(
const DataStream & input_stream_,
SortDescription prefix_description_,
@ -117,7 +135,11 @@ void SortingStep::updateOutputStream()
{
output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
output_stream->sort_description = result_description;
output_stream->sort_scope = DataStream::SortScope::Global;
if (partition_by_description.empty())
output_stream->sort_scope = DataStream::SortScope::Global;
else
output_stream->sort_scope = DataStream::SortScope::Stream;
}
void SortingStep::updateLimit(size_t limit_)
@ -135,6 +157,55 @@ void SortingStep::convertToFinishSorting(SortDescription prefix_description_)
prefix_description = std::move(prefix_description_);
}
void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
{
size_t threads = pipeline.getNumThreads();
size_t streams = pipeline.getNumStreams();
if (!partition_by_description.empty() && threads > 1)
{
Block stream_header = pipeline.getHeader();
ColumnNumbers key_columns;
key_columns.reserve(partition_by_description.size());
for (auto & col : partition_by_description)
{
key_columns.push_back(stream_header.getPositionByName(col.column_name));
}
pipeline.transform([&](OutputPortRawPtrs ports)
{
Processors processors;
for (auto * port : ports)
{
auto scatter = std::make_shared<ScatterByPartitionTransform>(stream_header, threads, key_columns);
connect(*port, scatter->getInputs().front());
processors.push_back(scatter);
}
return processors;
});
if (streams > 1)
{
pipeline.transform([&](OutputPortRawPtrs ports)
{
Processors processors;
for (size_t i = 0; i < threads; ++i)
{
size_t output_it = i;
auto resize = std::make_shared<ResizeProcessor>(stream_header, streams, 1);
auto & inputs = resize->getInputs();
for (auto input_it = inputs.begin(); input_it != inputs.end(); output_it += threads, ++input_it)
connect(*ports[output_it], *input_it);
processors.push_back(resize);
}
return processors;
});
}
}
}
void SortingStep::finishSorting(
QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, const UInt64 limit_)
{
@ -260,10 +331,12 @@ void SortingStep::fullSortStreams(
void SortingStep::fullSort(
QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort)
{
scatterByPartitionIfNeeded(pipeline);
fullSortStreams(pipeline, sort_settings, result_sort_desc, limit_, skip_partial_sort);
/// If there are several streams, then we merge them into one
if (pipeline.getNumStreams() > 1)
if (pipeline.getNumStreams() > 1 && (partition_by_description.empty() || pipeline.getNumThreads() == 1))
{
auto transform = std::make_shared<MergingSortedTransform>(
pipeline.getHeader(),
@ -295,6 +368,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
{
bool need_finish_sorting = (prefix_description.size() < result_description.size());
mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));
if (need_finish_sorting)
{
finishSorting(pipeline, prefix_description, result_description, limit);

View File

@ -40,6 +40,15 @@ public:
const Settings & settings_,
bool optimize_sorting_by_input_stream_properties_);
/// Full with partitioning
SortingStep(
const DataStream & input_stream,
const SortDescription & description_,
const SortDescription & partition_by_description_,
UInt64 limit_,
const Settings & settings_,
bool optimize_sorting_by_input_stream_properties_);
/// FinishSorting
SortingStep(
const DataStream & input_stream_,
@ -83,14 +92,24 @@ public:
bool skip_partial_sort = false);
private:
void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
void updateOutputStream() override;
static void
mergeSorting(QueryPipelineBuilder & pipeline, const Settings & sort_settings, const SortDescription & result_sort_desc, UInt64 limit_);
static void mergeSorting(
QueryPipelineBuilder & pipeline,
const Settings & sort_settings,
const SortDescription & result_sort_desc,
UInt64 limit_);
void mergingSorted(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, UInt64 limit_);
void mergingSorted(
QueryPipelineBuilder & pipeline,
const SortDescription & result_sort_desc,
UInt64 limit_);
void finishSorting(
QueryPipelineBuilder & pipeline, const SortDescription & input_sort_desc, const SortDescription & result_sort_desc, UInt64 limit_);
QueryPipelineBuilder & pipeline,
const SortDescription & input_sort_desc,
const SortDescription & result_sort_desc,
UInt64 limit_);
void fullSort(
QueryPipelineBuilder & pipeline,
const SortDescription & result_sort_desc,
@ -101,6 +120,9 @@ private:
SortDescription prefix_description;
const SortDescription result_description;
SortDescription partition_by_description;
UInt64 limit;
bool always_read_till_end = false;

View File

@ -67,7 +67,8 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
// This resize is needed for cases such as `over ()` when we don't have a
// sort node, and the input might have multiple streams. The sort node would
// have resized it.
pipeline.resize(1);
if (window_description.full_sort_description.empty())
pipeline.resize(1);
pipeline.addSimpleTransform(
[&](const Block & /*header*/)

View File

@ -377,9 +377,7 @@ private:
auto & output = outputs.front();
auto chunk = std::move(single_level_chunks.back());
single_level_chunks.pop_back();
const auto has_rows = chunk.hasRows();
if (has_rows)
output.push(std::move(chunk));
output.push(std::move(chunk));
if (finished && single_level_chunks.empty())
{
@ -387,7 +385,7 @@ private:
return Status::Finished;
}
return has_rows ? Status::PortFull : Status::Ready;
return Status::PortFull;
}
/// Read all sources and try to push current bucket.
@ -466,7 +464,8 @@ private:
auto block = params->aggregator.prepareBlockAndFillWithoutKey(
*first, params->final, first->type != AggregatedDataVariants::Type::without_key);
single_level_chunks.emplace_back(convertToChunk(block));
if (block.rows() > 0)
single_level_chunks.emplace_back(convertToChunk(block));
}
}
@ -493,7 +492,8 @@ private:
auto blocks = params->aggregator.prepareBlockAndFillSingleLevel</* return_single_block */ false>(*first, params->final);
for (auto & block : blocks)
single_level_chunks.emplace_back(convertToChunk(block));
if (block.rows() > 0)
single_level_chunks.emplace_back(convertToChunk(block));
finished = true;
data.reset();

View File

@ -0,0 +1,129 @@
#include <Processors/Transforms/ScatterByPartitionTransform.h>
#include <Common/PODArray.h>
#include <Core/ColumnNumbers.h>
namespace DB
{
ScatterByPartitionTransform::ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_)
: IProcessor(InputPorts{header}, OutputPorts{output_size_, header})
, output_size(output_size_)
, key_columns(std::move(key_columns_))
, hash(0)
{}
IProcessor::Status ScatterByPartitionTransform::prepare()
{
auto & input = getInputs().front();
/// Check all outputs are finished or ready to get data.
bool all_finished = true;
for (auto & output : outputs)
{
if (output.isFinished())
continue;
all_finished = false;
}
if (all_finished)
{
input.close();
return Status::Finished;
}
if (!all_outputs_processed)
{
auto output_it = outputs.begin();
bool can_push = false;
for (size_t i = 0; i < output_size; ++i, ++output_it)
if (!was_output_processed[i] && output_it->canPush())
can_push = true;
if (!can_push)
return Status::PortFull;
return Status::Ready;
}
/// Try get chunk from input.
if (input.isFinished())
{
for (auto & output : outputs)
output.finish();
return Status::Finished;
}
input.setNeeded();
if (!input.hasData())
return Status::NeedData;
chunk = input.pull();
has_data = true;
was_output_processed.assign(outputs.size(), false);
return Status::Ready;
}
void ScatterByPartitionTransform::work()
{
if (all_outputs_processed)
generateOutputChunks();
all_outputs_processed = true;
size_t chunk_number = 0;
for (auto & output : outputs)
{
auto & was_processed = was_output_processed[chunk_number];
auto & output_chunk = output_chunks[chunk_number];
++chunk_number;
if (was_processed)
continue;
if (output.isFinished())
continue;
if (!output.canPush())
{
all_outputs_processed = false;
continue;
}
output.push(std::move(output_chunk));
was_processed = true;
}
if (all_outputs_processed)
{
has_data = false;
output_chunks.clear();
}
}
void ScatterByPartitionTransform::generateOutputChunks()
{
auto num_rows = chunk.getNumRows();
const auto & columns = chunk.getColumns();
hash.reset(num_rows);
for (const auto & column_number : key_columns)
columns[column_number]->updateWeakHash32(hash);
const auto & hash_data = hash.getData();
IColumn::Selector selector(num_rows);
for (size_t row = 0; row < num_rows; ++row)
selector[row] = hash_data[row] % output_size;
output_chunks.resize(output_size);
for (const auto & column : columns)
{
auto filtered_columns = column->scatter(output_size, selector);
for (size_t i = 0; i < output_size; ++i)
output_chunks[i].addColumn(std::move(filtered_columns[i]));
}
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Common/WeakHash.h>
#include <Core/ColumnNumbers.h>
#include <Processors/IProcessor.h>
namespace DB
{
struct ScatterByPartitionTransform : IProcessor
{
ScatterByPartitionTransform(Block header, size_t output_size_, ColumnNumbers key_columns_);
String getName() const override { return "ScatterByPartitionTransform"; }
Status prepare() override;
void work() override;
private:
void generateOutputChunks();
size_t output_size;
ColumnNumbers key_columns;
bool has_data = false;
bool all_outputs_processed = true;
std::vector<char> was_output_processed;
Chunk chunk;
WeakHash32 hash;
Chunks output_chunks;
};
}

View File

@ -1585,17 +1585,21 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
static constexpr size_t ARGUMENT_VALUE = 0;
static constexpr size_t ARGUMENT_TIME = 1;
WindowFunctionExponentialTimeDecayedSum(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
{
if (parameters_.size() != 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes exactly one parameter", name_);
}
decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
}
WindowFunctionExponentialTimeDecayedSum(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, decay_length(getDecayLength(parameters_, name_))
{
if (argument_types.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -1670,7 +1674,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
}
private:
Float64 decay_length;
const Float64 decay_length;
};
struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
@ -1678,17 +1682,21 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
static constexpr size_t ARGUMENT_VALUE = 0;
static constexpr size_t ARGUMENT_TIME = 1;
WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
{
if (parameters_.size() != 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes exactly one parameter", name_);
}
decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
}
WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, decay_length(getDecayLength(parameters_, name_))
{
if (argument_types.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -1742,24 +1750,28 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
}
private:
Float64 decay_length;
const Float64 decay_length;
};
struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFunction<ExponentialTimeDecayedSumState>
{
static constexpr size_t ARGUMENT_TIME = 0;
WindowFunctionExponentialTimeDecayedCount(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
{
if (parameters_.size() != 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes exactly one parameter", name_);
}
decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
}
WindowFunctionExponentialTimeDecayedCount(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, decay_length(getDecayLength(parameters_, name_))
{
if (argument_types.size() != 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -1823,7 +1835,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu
}
private:
Float64 decay_length;
const Float64 decay_length;
};
struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunction<ExponentialTimeDecayedAvgState>
@ -1831,17 +1843,21 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
static constexpr size_t ARGUMENT_VALUE = 0;
static constexpr size_t ARGUMENT_TIME = 1;
WindowFunctionExponentialTimeDecayedAvg(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
static Float64 getDecayLength(const Array & parameters_, const std::string & name_)
{
if (parameters_.size() != 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes exactly one parameter", name_);
}
decay_length = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
}
WindowFunctionExponentialTimeDecayedAvg(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, decay_length(getDecayLength(parameters_, name_))
{
if (argument_types.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -1933,7 +1949,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
}
private:
Float64 decay_length;
const Float64 decay_length;
};
struct WindowFunctionRowNumber final : public WindowFunction
@ -1955,12 +1971,30 @@ struct WindowFunctionRowNumber final : public WindowFunction
}
};
namespace
{
struct NtileState
{
UInt64 buckets = 0;
RowNumber start_row;
UInt64 current_partition_rows = 0;
UInt64 current_partition_inserted_row = 0;
void windowInsertResultInto(
const WindowTransform * transform,
size_t function_index,
const DataTypes & argument_types);
static void checkWindowFrameType(const WindowTransform * transform);
};
}
// Usage: ntile(n). n is the number of buckets.
struct WindowFunctionNtile final : public WindowFunction
struct WindowFunctionNtile final : public StatefulWindowFunction<NtileState>
{
WindowFunctionNtile(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
: StatefulWindowFunction<NtileState>(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
{
if (argument_types.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one argument", name_);
@ -1982,6 +2016,19 @@ struct WindowFunctionNtile final : public WindowFunction
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
const auto & workspace = transform->workspaces[function_index];
auto & state = getState(workspace);
state.windowInsertResultInto(transform, function_index, argument_types);
}
};
namespace
{
void NtileState::windowInsertResultInto(
const WindowTransform * transform,
size_t function_index,
const DataTypes & argument_types)
{
if (!buckets) [[unlikely]]
{
@ -2072,13 +2119,8 @@ struct WindowFunctionNtile final : public WindowFunction
bucket_num += 1;
}
}
private:
UInt64 buckets = 0;
RowNumber start_row;
UInt64 current_partition_rows = 0;
UInt64 current_partition_inserted_row = 0;
static void checkWindowFrameType(const WindowTransform * transform)
void NtileState::checkWindowFrameType(const WindowTransform * transform)
{
if (transform->order_by_indices.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause");
@ -2093,7 +2135,7 @@ private:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
}
}
};
}
// ClickHouse-specific variant of lag/lead that respects the window frame.
template <bool is_lead>
@ -2298,16 +2340,18 @@ struct NonNegativeDerivativeState
Float64 previous_timestamp = 0;
};
// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND])
struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction<NonNegativeDerivativeState>
struct NonNegativeDerivativeParams
{
static constexpr size_t ARGUMENT_METRIC = 0;
static constexpr size_t ARGUMENT_TIMESTAMP = 1;
static constexpr size_t ARGUMENT_INTERVAL = 2;
WindowFunctionNonNegativeDerivative(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
Float64 interval_length = 1;
bool interval_specified = false;
Int64 ts_scale_multiplier = 0;
NonNegativeDerivativeParams(
const std::string & name_, const DataTypes & argument_types, const Array & parameters)
{
if (!parameters.empty())
{
@ -2365,6 +2409,18 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction
interval_specified = true;
}
}
};
// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND])
struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction<NonNegativeDerivativeState>, public NonNegativeDerivativeParams
{
using Params = NonNegativeDerivativeParams;
WindowFunctionNonNegativeDerivative(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, NonNegativeDerivativeParams(name, argument_types, parameters)
{}
bool allocatesMemoryInArena() const override { return false; }
@ -2405,10 +2461,6 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction
WindowFunctionHelpers::setValueToOutputColumn<Float64>(transform, function_index, result >= 0 ? result : 0);
}
private:
Float64 interval_length = 1;
bool interval_specified = false;
Int64 ts_scale_multiplier = 0;
};

View File

@ -136,7 +136,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
HTTPServerResponse & response_,
bool is_http_method_head_,
size_t keep_alive_timeout_,
UInt64 keep_alive_timeout_,
bool compress_,
CompressionMethod compression_method_)
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE)

View File

@ -36,7 +36,7 @@ public:
WriteBufferFromHTTPServerResponse(
HTTPServerResponse & response_,
bool is_http_method_head_,
size_t keep_alive_timeout_,
UInt64 keep_alive_timeout_,
bool compress_ = false, /// If true - set Content-Encoding header and compress the result.
CompressionMethod compression_method_ = CompressionMethod::None);

View File

@ -616,12 +616,10 @@ void HTTPHandler::processQuery(
size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE;
size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0;
unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
response,
request.getMethod() == HTTPRequest::HTTP_HEAD,
keep_alive_timeout,
context->getServerSettings().keep_alive_timeout.totalSeconds(),
client_supports_http_compression,
http_response_compression_method);

View File

@ -87,8 +87,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
response.setChunkedTransferEncoding(true);
Output used_output;
const auto & config = server.config();
unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);

View File

@ -17,6 +17,9 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
{
try
{
/// Raw config reference is used here to avoid dependency on Context and ServerSettings.
/// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1
/// And there ordinary Context is replaced with a tiny clone.
const auto & config = server.config();
unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);

View File

@ -84,7 +84,8 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
}
setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT));
const auto & server_settings = getContext()->getServerSettings();
setResponseDefaultHeaders(response, server_settings.keep_alive_timeout.totalSeconds());
if (!ok)
{

View File

@ -34,7 +34,7 @@ namespace ErrorCodes
}
static inline WriteBufferPtr
responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout)
responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout)
{
/// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
String http_response_compression_methods = request.get("Accept-Encoding", "");
@ -90,7 +90,7 @@ static inline void trySendExceptionToClient(
void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
const auto & out = responseWriteBuffer(request, response, keep_alive_timeout);
try

View File

@ -5,6 +5,7 @@
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <Interpreters/Context.h>
#include <IO/HTTPCommon.h>
#ifdef __clang__
@ -37,7 +38,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_)
void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
response.setContentType("text/html; charset=UTF-8");

View File

@ -692,6 +692,15 @@ public:
/// when considering in-memory blocks.
virtual std::optional<UInt64> totalBytes(const Settings &) const { return {}; }
/// If it is possible to quickly determine exact number of uncompressed bytes for the table on storage:
/// - disk (uncompressed)
///
/// Used for:
/// - For total_bytes_uncompressed column in system.tables
///
/// Does not take underlying Storage (if any) into account
virtual std::optional<UInt64> totalBytesUncompressed(const Settings &) const { return {}; }
/// Number of rows INSERTed since server start.
///
/// Does not take the underlying Storage (if any) into account.

View File

@ -522,11 +522,10 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
uri,
Poco::Net::HTTPRequest::HTTP_POST,
nullptr,
timeouts,
creds,
DBMS_DEFAULT_BUFFER_SIZE,
0, /* no redirects */
static_cast<uint64_t>(data_settings->replicated_max_parallel_fetches_for_host));
context->getCommonFetchesSessionFactory());
int server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));

View File

@ -593,6 +593,23 @@ UInt64 IMergeTreeDataPart::getMarksCount() const
return index_granularity.getMarksCount();
}
UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const
{
if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !rows_count
|| !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
return bytes_on_disk;
/// Uninitialized existing_rows_count
/// (if existing_rows_count equals rows_count, it means that previously we failed to read existing_rows_count)
if (existing_rows_count > rows_count)
readExistingRowsCount();
if (existing_rows_count < rows_count)
return bytes_on_disk * existing_rows_count / rows_count;
else /// Load failed
return bytes_on_disk;
}
size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
{
auto checksum = checksums.files.find(file_name);
@ -1142,6 +1159,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
{
assertEOF(*buf);
bytes_on_disk = checksums.getTotalSizeOnDisk();
bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk();
}
else
bytes_on_disk = getDataPartStorage().calculateTotalSizeOnDisk();
@ -1159,6 +1177,7 @@ void IMergeTreeDataPart::loadChecksums(bool require)
writeChecksums(checksums, {});
bytes_on_disk = checksums.getTotalSizeOnDisk();
bytes_uncompressed_on_disk = checksums.getTotalSizeUncompressedOnDisk();
}
}
@ -1285,6 +1304,85 @@ void IMergeTreeDataPart::loadRowsCount()
}
}
void IMergeTreeDataPart::readExistingRowsCount() const
{
if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
|| existing_rows_count < rows_count || !getMarksCount())
return;
std::lock_guard lock(existing_rows_count_mutex);
/// Already read by another thread
if (existing_rows_count < rows_count)
return;
NamesAndTypesList cols;
cols.push_back(LightweightDeleteDescription::FILTER_COLUMN);
StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr();
StorageSnapshotPtr storage_snapshot_ptr = std::make_shared<StorageSnapshot>(storage, metadata_ptr);
MergeTreeReaderPtr reader = getReader(
cols,
storage_snapshot_ptr,
MarkRanges{MarkRange(0, getMarksCount())},
nullptr,
storage.getContext()->getMarkCache().get(),
std::make_shared<AlterConversions>(),
MergeTreeReaderSettings{},
ValueSizeMap{},
ReadBufferFromFileBase::ProfileCallback{});
if (!reader)
{
LOG_WARNING(storage.log, "Create reader failed while reading existing rows count");
existing_rows_count = rows_count;
return;
}
size_t current_mark = 0;
const size_t total_mark = getMarksCount();
bool continue_reading = false;
size_t current_row = 0;
size_t existing_count = 0;
while (current_row < rows_count)
{
size_t rows_to_read = index_granularity.getMarkRows(current_mark);
continue_reading = (current_mark != 0);
Columns result;
result.resize(1);
size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result);
if (!rows_read)
{
LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name);
existing_rows_count = rows_count;
return;
}
current_row += rows_read;
current_mark += (rows_to_read == rows_read);
const ColumnUInt8 * row_exists_col = typeid_cast<const ColumnUInt8 *>(result[0].get());
if (!row_exists_col)
{
LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name);
existing_rows_count = rows_count;
return;
}
for (UInt8 row_exists : row_exists_col->getData())
if (row_exists)
existing_count++;
}
existing_rows_count = existing_count;
LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_rows_count);
}
void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
{
files.push_back("count.txt");

View File

@ -229,6 +229,13 @@ public:
size_t rows_count = 0;
/// Existing rows count (excluding lightweight deleted rows)
/// UINT64_MAX -> uninitialized
/// 0 -> all rows were deleted
/// if reading failed, it will be set to rows_count
mutable size_t existing_rows_count = UINT64_MAX;
mutable std::mutex existing_rows_count_mutex;
time_t modification_time = 0;
/// When the part is removed from the working set. Changes once.
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
@ -370,7 +377,13 @@ public:
UInt64 getIndexSizeFromFile() const;
UInt64 getBytesOnDisk() const { return bytes_on_disk; }
UInt64 getBytesUncompressedOnDisk() const { return bytes_uncompressed_on_disk; }
void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; }
/// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true
/// Otherwise returns bytes_on_disk
UInt64 getExistingBytesOnDisk() const;
size_t getFileSizeOrZero(const String & file_name) const;
auto getFilesChecksums() const { return checksums.files; }
@ -498,6 +511,9 @@ public:
/// True if here is lightweight deleted mask file in part.
bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
/// Read existing rows count from _row_exists column
void readExistingRowsCount() const;
void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);
/// Checks the consistency of this data part.
@ -566,6 +582,7 @@ protected:
/// Total size on disk, not only columns. May not contain size of
/// checksums.txt and columns.txt. 0 - if not counted;
UInt64 bytes_on_disk{0};
UInt64 bytes_uncompressed_on_disk{0};
/// Columns description. Cannot be changed, after part initialization.
NamesAndTypesList columns;

View File

@ -160,7 +160,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
}
/// Start to make the main work
size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts);
size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true);
/// Can throw an exception while reserving space.
IMergeTreeDataPart::TTLInfos ttl_infos;

View File

@ -4835,10 +4835,18 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context
partition_size += part->getBytesOnDisk();
auto table_id = getStorageID();
const auto & query_settings = local_context->getSettingsRef();
if (query_settings.max_partition_size_to_drop.changed)
{
getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size, query_settings.max_partition_size_to_drop);
return;
}
getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size);
}
void MergeTreeData::checkPartCanBeDropped(const String & part_name)
void MergeTreeData::checkPartCanBeDropped(const String & part_name, ContextPtr local_context)
{
if (!supportsReplication() && isStaticStorage())
return;
@ -4848,6 +4856,14 @@ void MergeTreeData::checkPartCanBeDropped(const String & part_name)
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name);
auto table_id = getStorageID();
const auto & query_settings = local_context->getSettingsRef();
if (query_settings.max_partition_size_to_drop.changed)
{
getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk(), query_settings.max_partition_size_to_drop);
return;
}
getContext()->checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk());
}
@ -5035,7 +5051,7 @@ Pipe MergeTreeData::alterPartition(
if (command.part)
{
auto part_name = command.partition->as<ASTLiteral &>().value.safeGet<String>();
checkPartCanBeDropped(part_name);
checkPartCanBeDropped(part_name, query_context);
dropPart(part_name, command.detach, query_context);
}
else

Some files were not shown because too many files have changed in this diff Show More