Merge branch 'master' into divanik/add_local_and_azure_iceberg_support

This commit is contained in:
Daniil Ivanik 2024-08-13 18:24:02 +02:00 committed by GitHub
commit f9f41405cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
510 changed files with 5638 additions and 4762 deletions

View File

@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
---
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
---
- [ ] <!---do_not_test--> Do not test
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
- [ ] <!---upload_all--> Upload binaries for special builds

3
.gitmodules vendored
View File

@ -230,9 +230,6 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/annoy"]
path = contrib/annoy
url = https://github.com/ClickHouse/annoy
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")

View File

@ -58,6 +58,10 @@ namespace Net
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
private:
bool _firstRequest;
Poco::Timespan _keepAliveTimeout;

View File

@ -19,11 +19,11 @@ namespace Poco {
namespace Net {
HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
HTTPSession(socket, pParams->getKeepAlive()),
_firstRequest(true),
_keepAliveTimeout(pParams->getKeepAliveTimeout()),
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
: HTTPSession(socket, pParams->getKeepAlive())
, _firstRequest(true)
, _keepAliveTimeout(pParams->getKeepAliveTimeout())
, _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
{
setTimeout(pParams->getTimeout());
}
@ -52,11 +52,12 @@ bool HTTPServerSession::hasMoreRequests()
}
else if (_maxKeepAliveRequests != 0 && getKeepAlive())
{
if (_maxKeepAliveRequests > 0)
--_maxKeepAliveRequests;
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
}
else return false;
if (_maxKeepAliveRequests > 0)
--_maxKeepAliveRequests;
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
}
else
return false;
}

View File

@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
if (WITH_COVERAGE)
message (STATUS "Enabled instrumentation for code coverage")
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)

View File

@ -205,9 +205,8 @@ add_contrib (morton-nd-cmake morton-nd)
if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)
endif()
add_contrib (annoy-cmake annoy)
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
if (ENABLE_USEARCH)
add_contrib (FP16-cmake FP16)
add_contrib (robin-map-cmake robin-map)

1
contrib/annoy vendored

@ -1 +0,0 @@
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956

View File

@ -1,24 +0,0 @@
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
# https://github.com/spotify/annoy/issues/456
# Problem with aligment can lead to errors like
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
message (STATUS "Not using annoy")
return()
endif()
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
add_library(_annoy INTERFACE)
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
add_library(ch_contrib::annoy ALIAS _annoy)
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)

@ -1 +1 @@
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa

View File

@ -1,9 +1,7 @@
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
add_library(_usearch INTERFACE)
@ -11,7 +9,6 @@ target_include_directories(_usearch SYSTEM INTERFACE
${FP16_PROJECT_DIR}/include
${ROBIN_MAP_PROJECT_DIR}/include
${SIMSIMD_PROJECT_DIR}/include
${USEARCH_SOURCE_DIR})
${USEARCH_PROJECT_DIR}/include)
add_library(ch_contrib::usearch ALIAS _usearch)
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)

View File

@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
bash -x /build/packages/build
fi
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
mv ./programs/clickhouse* /output ||:
mv ./programs/*_fuzzer /output ||:
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output

View File

@ -54,8 +54,6 @@ source /utils.lib
/usr/share/clickhouse-test/config/install.sh
./setup_minio.sh stateless
./mc admin trace clickminio > /test_output/minio.log &
MC_ADMIN_PID=$!
./setup_hdfs_minicluster.sh
@ -176,6 +174,55 @@ done
setup_logs_replication
attach_gdb_to_clickhouse
# create tables for minio log webhooks
clickhouse-client --query "CREATE TABLE minio_audit_logs
(
log String,
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
)
ENGINE = MergeTree
ORDER BY tuple()"
clickhouse-client --query "CREATE TABLE minio_server_logs
(
log String,
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
)
ENGINE = MergeTree
ORDER BY tuple()"
# create minio log webhooks for both audit and server logs
# use async inserts to avoid creating too many parts
./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
max_retries=100
retry=1
while [ $retry -le $max_retries ]; do
echo "clickminio restart attempt $retry:"
output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status)
echo "Output of restart status: $output"
expected_output="success
success"
if [ "$output" = "$expected_output" ]; then
echo "Restarted clickminio successfully."
break
fi
sleep 1
retry=$((retry + 1))
done
if [ $retry -gt $max_retries ]; then
echo "Failed to restart clickminio after $max_retries attempts."
fi
./mc admin trace clickminio > /test_output/minio.log &
MC_ADMIN_PID=$!
function fn_exists() {
declare -F "$1" > /dev/null;
}
@ -339,6 +386,14 @@ do
fi
done
# collect minio audit and server logs
# wait for minio to flush its batch if it has any
sleep 1
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
# Stop server so we can safely read data with clickhouse-local.
# Why do we read data with clickhouse-local?
# Because it's the simplest way to read it when server has crashed.

View File

@ -99,10 +99,9 @@ upload_data() {
# iterating over globs will cause redundant file variable to be
# a path to a file, not a filename
# shellcheck disable=SC2045
for file in $(ls "${data_path}"); do
echo "${file}";
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
done
if [ -d "${data_path}" ]; then
./mc cp --recursive "${data_path}"/ clickminio/test/
fi
}
setup_aws_credentials() {

View File

@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
:::note
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
### Adding a New Test
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.

View File

@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
``` sql
SELECT *
FROM table_with_ann_index
FROM table
ORDER BY Distance(vectors, Point)
LIMIT N
```
@ -27,75 +27,109 @@ Function `Distance` computes the distance between two vectors. Often, the Euclid
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
0.33, ...)`, and `N` limits the number of search results.
An alternative formulation of the nearest neighborhood search problem looks as follows:
This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
situations where `MaxDistance` is difficult to determine in advance.
``` sql
SELECT *
FROM table_with_ann_index
WHERE Distance(vectors, Point) < MaxDistance
LIMIT N
```
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
`MaxDistance` is difficult to determine in advance.
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
# Creating and Using ANN Indexes {#creating_using_ann_indexes}
# Creating and Using Vector Similarity Indexes
Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table_with_ann_index
CREATE TABLE table
(
`id` Int64,
`vectors` Array(Float32),
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
id Int64,
vectors Array(Float32),
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Parameters:
- `method`: Supports currently only `hnsw`.
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
line between two points in Euclidean space), or `cosineDistance` (the [cosine
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
- `m`: the number of neighbors per graph node (optional, default: 16)
- `ef_construction`: (optional, default: 128)
- `ef_search`: (optional, default: 64)
Example:
```sql
CREATE TABLE table
(
id Int64,
vectors Array(Float32),
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
)
ENGINE = MergeTree
ORDER BY id;
```
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
files, without loading them into RAM.
USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
Vector similarity indexes currently support two distance functions:
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
If no scalar kind was specified during index creation, `f16` is used as default.
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.
:::note
All arrays must have same length. To avoid errors, you can use a
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
:::
:::note
The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
:::
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
requests.
ANN indexes support two types of queries:
- ORDER BY queries:
ANN indexes support these queries:
``` sql
SELECT *
FROM table_with_ann_index
FROM table
[WHERE ...]
ORDER BY Distance(vectors, Point)
LIMIT N
```
- WHERE queries:
``` sql
SELECT *
FROM table_with_ann_index
WHERE Distance(vectors, Point) < MaxDistance
LIMIT N
```
:::tip
To avoid writing out large vectors, you can use [query
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
```bash
clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
```
:::
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
approximate neighbor search.
@ -122,128 +156,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
was specified for ANN indexes, the default value is 100 million.
# Available ANN Indexes {#available_ann_indexes}
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
## Annoy {#annoy}
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
disabled on ARM due to memory safety problems with the algorithm.
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
linear surfaces (lines in 2D, planes in 3D etc.).
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table_with_annoy_index
(
id Int64,
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Annoy currently supports two distance functions:
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.
Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
:::note
All arrays must have same length. To avoid errors, you can use a
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
:::
The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
values mean more accurate results at the cost of longer query runtime:
```sql
SELECT *
FROM table_name
ORDER BY L2Distance(vectors, Point)
LIMIT N
SETTINGS annoy_index_search_k_nodes=100;
```
:::note
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
:::
## USearch {#usearch}
This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
navigation around immutable persistent files, without loading them into RAM.
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
USearch currently supports two distance functions:
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
was specified during index creation, `f16` is used as default.
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.

View File

@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
considers results of the same query with different tags different.
Example for creating three different query cache entries for the same query:
```sql
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
```
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks

View File

@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
<keep_alive_timeout>10</keep_alive_timeout>
```
## max_keep_alive_requests {#max-keep-alive-requests}
Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
**Example**
``` xml
<max_keep_alive_requests>10</max_keep_alive_requests>
```
## listen_host {#listen_host}
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.

View File

@ -1800,6 +1800,17 @@ Possible values:
Default value: `0`.
## query_cache_tag {#query-cache-tag}
A string which acts as a label for [query cache](../query-cache.md) entries.
The same queries with different tags are considered different by the query cache.
Possible values:
- Any string
Default value: `''`
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
@ -5627,6 +5638,12 @@ Disable all insert and mutations (alter table update / alter table delete / alte
Default value: `false`.
## use_hive_partitioning
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
Default value: `false`.
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.

View File

@ -24,6 +24,7 @@ Columns:
- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions
- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster
- `is_currently_used`, (UInt8) - consumer is in use
- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds
- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds).
Example:

View File

@ -9,6 +9,7 @@ Columns:
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
@ -26,6 +27,7 @@ Row 1:
──────
query: SELECT 1 SETTINGS use_query_cache = 1
result_size: 128
tag:
stale: 0
shared: 0
compressed: 1

View File

@ -4189,3 +4189,94 @@ Result:
│ 32 │
└─────────────────────────────┘
```
## getSubcolumn
Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
**Syntax**
```sql
getSubcolumn(col_name, subcol_name)
```
**Arguments**
- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
**Returned value**
- Returns the extracted sub-column.
**Example**
Query:
```sql
CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
```
Result:
```response
┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
1. │ [1,2] │ ['Hello','World'] │
2. │ [3,4,5] │ ['This','is','subcolumn'] │
└─────────────────────────────────┴─────────────────────────────────┘
```
## getTypeSerializationStreams
Enumerates stream paths of a data type.
:::note
This function is intended for use by developers.
:::
**Syntax**
```sql
getTypeSerializationStreams(col)
```
**Arguments**
- `col` — Column or string representation of a data-type from which the data type will be detected.
**Returned value**
- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
**Examples**
Query:
```sql
SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
```
Result:
```response
┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
```sql
SELECT getTypeSerializationStreams('Map(String, Int64)');
```
Result:
```response
┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -8,26 +8,28 @@ sidebar_label: STATISTICS
The following operations are available:
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata.
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The first two commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing statistics metadata via ZooKeeper.
There is an example adding two statistics types to two columns:
## Example:
Adding two statistics types to two columns:
```
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
```
:::note
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
**See Also**
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
## Hive-style partitioning {#hive-style-partitioning}
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
**Example**
Use virtual column, created with Hive-style partitioning
``` sql
SET use_hive_partitioning = 1;
SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```

View File

@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Hive-style partitioning {#hive-style-partitioning}
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
**Example**
Use virtual column, created with Hive-style partitioning
``` sql
SET use_hive_partitioning = 1;
SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.

View File

@ -100,6 +100,19 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Hive-style partitioning {#hive-style-partitioning}
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
**Example**
Use virtual column, created with Hive-style partitioning
``` sql
SET use_hive_partitioning = 1;
SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Storage Settings {#storage-settings}
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -274,6 +274,19 @@ FROM s3(
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Hive-style partitioning {#hive-style-partitioning}
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
**Example**
Use virtual column, created with Hive-style partitioning
``` sql
SET use_hive_partitioning = 1;
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Hive-style partitioning {#hive-style-partitioning}
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
**Example**
Use virtual column, created with Hive-style partitioning
``` sql
SET use_hive_partitioning = 1;
SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.

View File

@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos
示例 (进入 `config.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
主体规范:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
按领域过滤:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! warning "注意"
@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式:
示例 (进入 `users.xml`):
```
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式:
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```
!!! warning "警告"

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")

View File

@ -223,7 +223,7 @@ std::vector<String> Client::loadWarningMessages()
size_t rows = packet.block.rows();
for (size_t i = 0; i < rows; ++i)
messages.emplace_back(column[i].get<String>());
messages.emplace_back(column[i].safeGet<String>());
}
continue;

View File

@ -95,7 +95,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
client->zookeeper->set(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<Int32>(query->args[2].get<Int32>()));
static_cast<Int32>(query->args[2].safeGet<Int32>()));
}
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@ -494,7 +494,7 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
{
Int32 version{-1};
if (query->args.size() == 2)
version = static_cast<Int32>(query->args[1].get<Int32>());
version = static_cast<Int32>(query->args[1].safeGet<Int32>());
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
}
@ -549,7 +549,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
String leaving;
String new_members;
auto operation = query->args[0].get<ReconfigCommand::Operation>();
auto operation = query->args[0].safeGet<ReconfigCommand::Operation>();
switch (operation)
{
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):

View File

@ -27,7 +27,7 @@ std::string LibraryBridge::bridgeName() const
LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
{
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", context);
}
}

View File

@ -9,12 +9,10 @@ namespace DB
{
LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
const std::string & name_,
size_t keep_alive_timeout_,
ContextPtr context_)
: WithContext(context_)
, log(getLogger(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
}
@ -26,17 +24,17 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
{
if (uri.getPath() == "/extdict_ping")
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(getContext());
else if (uri.getPath() == "/catboost_ping")
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(getContext());
}
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
{
if (uri.getPath() == "/extdict_request")
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(getContext());
else if (uri.getPath() == "/catboost_request")
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(getContext());
}
return nullptr;

View File

@ -13,7 +13,6 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex
public:
LibraryBridgeHandlerFactory(
const std::string & name_,
size_t keep_alive_timeout_,
ContextPtr context_);
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
@ -21,7 +20,6 @@ public:
private:
LoggerPtr log;
const std::string name;
const size_t keep_alive_timeout;
};
}

View File

@ -87,10 +87,8 @@ static void writeData(Block data, OutputFormatPtr format)
}
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_)
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
{
}
@ -137,7 +135,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
const String & dictionary_id = params.get("dictionary_id");
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{
@ -374,10 +372,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_)
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
{
}
@ -401,7 +397,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
String res = library_handler ? "1" : "0";
setResponseDefaultHeaders(response, keep_alive_timeout);
setResponseDefaultHeaders(response);
LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
response.sendBuffer(res.data(), res.size());
}
@ -412,11 +408,8 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
}
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(getLogger("CatBoostLibraryBridgeRequestHandler"))
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(ContextPtr context_)
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeRequestHandler"))
{
}
@ -455,7 +448,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
const String & method = params.get("method");
LOG_TRACE(log, "Library method: '{}'", method);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{
@ -617,10 +610,8 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
}
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(getLogger("CatBoostLibraryBridgeExistsHandler"))
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(ContextPtr context_)
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeExistsHandler"))
{
}
@ -634,7 +625,7 @@ void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & reque
String res = "1";
setResponseDefaultHeaders(response, keep_alive_timeout);
setResponseDefaultHeaders(response);
LOG_TRACE(log, "Sending ping response: {}", res);
response.sendBuffer(res.data(), res.size());
}

View File

@ -18,14 +18,13 @@ namespace DB
class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
{
public:
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
explicit ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
static constexpr auto FORMAT = "RowBinary";
const size_t keep_alive_timeout;
LoggerPtr log;
};
@ -34,12 +33,11 @@ private:
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
explicit ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;
LoggerPtr log;
};
@ -63,12 +61,11 @@ private:
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
explicit CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;
LoggerPtr log;
};
@ -77,12 +74,11 @@ private:
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
explicit CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;
LoggerPtr log;
};

View File

@ -1307,6 +1307,7 @@ try
throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
SingleReadBufferIterator read_buffer_iterator(std::move(file));
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
}
else

View File

@ -202,10 +202,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (columns.empty())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
WriteBufferFromHTTPServerResponse out(
response,
request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{
writeStringBinary(columns.toString(), out);

View File

@ -15,18 +15,12 @@ namespace DB
class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
{
public:
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(getLogger("ODBCColumnsInfoHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}
explicit ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
LoggerPtr log;
size_t keep_alive_timeout;
};
}

View File

@ -74,7 +74,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
auto identifier = getIdentifierQuote(std::move(connection));
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{
writeStringBinary(identifier, out);

View File

@ -14,18 +14,12 @@ namespace DB
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
{
public:
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(getLogger("IdentifierQuoteHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}
explicit IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
LoggerPtr log;
size_t keep_alive_timeout;
};
}

View File

@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
return;
}
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{

View File

@ -20,12 +20,10 @@ class ODBCHandler : public HTTPRequestHandler, WithContext
{
public:
ODBCHandler(
size_t keep_alive_timeout_,
ContextPtr context_,
const String & mode_)
: WithContext(context_)
, log(getLogger("ODBCHandler"))
, keep_alive_timeout(keep_alive_timeout_)
, mode(mode_)
{
}
@ -35,7 +33,6 @@ public:
private:
LoggerPtr log;
size_t keep_alive_timeout;
String mode;
static inline std::mutex mutex;

View File

@ -27,7 +27,7 @@ std::string ODBCBridge::bridgeName() const
ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
{
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", context);
}
}

View File

@ -9,11 +9,8 @@
namespace DB
{
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(getLogger(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_)
: WithContext(context_), log(getLogger(name_)), name(name_)
{
}
@ -23,33 +20,33 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
LOG_TRACE(log, "Request URI: {}", uri.toString());
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<PingHandler>(keep_alive_timeout);
return std::make_unique<PingHandler>();
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
{
if (uri.getPath() == "/columns_info")
#if USE_ODBC
return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, getContext());
return std::make_unique<ODBCColumnsInfoHandler>(getContext());
#else
return nullptr;
#endif
else if (uri.getPath() == "/identifier_quote")
#if USE_ODBC
return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, getContext());
return std::make_unique<IdentifierQuoteHandler>(getContext());
#else
return nullptr;
#endif
else if (uri.getPath() == "/schema_allowed")
#if USE_ODBC
return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, getContext());
return std::make_unique<SchemaAllowedHandler>(getContext());
#else
return nullptr;
#endif
else if (uri.getPath() == "/write")
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
return std::make_unique<ODBCHandler>(getContext(), "write");
else
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
return std::make_unique<ODBCHandler>(getContext(), "read");
}
return nullptr;
}

View File

@ -17,14 +17,13 @@ namespace DB
class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_);
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
private:
LoggerPtr log;
std::string name;
size_t keep_alive_timeout;
};
}

View File

@ -10,7 +10,7 @@ void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerRes
{
try
{
setResponseDefaultHeaders(response, keep_alive_timeout);
setResponseDefaultHeaders(response);
const char * data = "Ok.\n";
response.sendBuffer(data, strlen(data));
}

View File

@ -9,11 +9,7 @@ namespace DB
class PingHandler : public HTTPRequestHandler
{
public:
explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
size_t keep_alive_timeout;
};
}

View File

@ -88,7 +88,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
bool result = isSchemaAllowed(std::move(connection));
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
try
{
writeBoolText(result, out);

View File

@ -17,18 +17,12 @@ class Context;
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
{
public:
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(getLogger("SchemaAllowedHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}
explicit SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
LoggerPtr log;
size_t keep_alive_timeout;
};
}

View File

@ -2428,6 +2428,7 @@ void Server::createServers(
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings.http_receive_timeout);
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
Poco::Util::AbstractConfiguration::Keys protocols;
config.keys("protocols", protocols);

View File

@ -0,0 +1 @@
../../../tests/config/config.d/transactions.xml

View File

@ -10,6 +10,7 @@
#include <Poco/Net/SocketAddress.h>
#include <Poco/Net/StreamSocket.h>
#include <Daemon/BaseDaemon.h>
#include <Interpreters/Context.h>
@ -25,6 +26,12 @@ static int64_t port = 9000;
using namespace std::chrono_literals;
void on_exit()
{
BaseDaemon::terminate();
main_app.wait();
}
extern "C"
int LLVMFuzzerInitialize(int * argc, char ***argv)
{
@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
exit(-1);
}
atexit(on_exit);
return 0;
}

View File

@ -780,12 +780,12 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
has_limit = true;
max_elems = parameters[0].get<UInt64>();
max_elems = parameters[0].safeGet<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
@ -816,11 +816,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[i].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[i].get<UInt64>() == 0))
if ((type == Field::Types::Int64 && parameters[i].safeGet<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[i].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
return parameters[i].get<UInt64>();
return parameters[i].safeGet<UInt64>();
};
UInt64 max_elems = get_parameter(0);

View File

@ -83,16 +83,16 @@ public:
if (version == 1)
{
for (size_t i = 0; i < arr_size; ++i)
set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
set.insert(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
}
else if (!set.empty())
{
typename State::Set new_set;
for (size_t i = 0; i < arr_size; ++i)
{
typename State::Set::LookupResult set_value = set.find(static_cast<T>((*data_column)[offset + i].get<T>()));
typename State::Set::LookupResult set_value = set.find(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
if (set_value != nullptr)
new_set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
new_set.insert(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
}
set = std::move(new_set);
}

View File

@ -269,12 +269,12 @@ AggregateFunctionPtr createAggregateFunctionMoving(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
limit_size = true;
max_elems = parameters[0].get<UInt64>();
max_elems = parameters[0].safeGet<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,

View File

@ -397,11 +397,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
max_elems = parameters[0].safeGet<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,

View File

@ -247,7 +247,7 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
if (type != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
delimiter = parameters[0].get<String>();
delimiter = parameters[0].safeGet<String>();
}
if (parameters.size() == 2)
{
@ -256,12 +256,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
if ((type == Field::Types::Int64 && parameters[1].safeGet<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].safeGet<Int64>());
has_limit = true;
limit = parameters[1].get<UInt64>();
limit = parameters[1].safeGet<UInt64>();
}
if (has_limit)

View File

@ -323,12 +323,12 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
limit_size = true;
max_elems = parameters[0].get<UInt64>();
max_elems = parameters[0].safeGet<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,

View File

@ -238,7 +238,7 @@ public:
if (params[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
const auto & param = params[0].get<String>();
const auto & param = params[0].safeGet<String>();
if (param == "two-sided")
alternative = Alternative::TwoSided;
else if (param == "less")
@ -255,7 +255,7 @@ public:
if (params[1].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
method = params[1].get<String>();
method = params[1].safeGet<String>();
if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
"It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());

View File

@ -181,7 +181,7 @@ public:
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
total_buckets = params[0].get<UInt64>();
total_buckets = params[0].safeGet<UInt64>();
this->x_type = WhichDataType(arguments[0]).idx;
this->y_type = WhichDataType(arguments[1]).idx;

View File

@ -152,7 +152,7 @@ public:
if (params[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
const auto & param = params[0].get<String>();
const auto & param = params[0].safeGet<String>();
if (param == "two-sided")
alternative = Alternative::TwoSided;
else if (param == "less")
@ -169,7 +169,7 @@ public:
if (params[1].getType() != Field::Types::UInt64)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
continuity_correction = static_cast<bool>(params[1].safeGet<UInt64>());
}
String getName() const override

View File

@ -117,7 +117,7 @@ public:
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName());
relative_accuracy = relative_accuracy_field.get<Float64>();
relative_accuracy = relative_accuracy_field.safeGet<Float64>();
if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy))
throw Exception(
@ -147,9 +147,9 @@ public:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
if (accuracy_field.getType() == Field::Types::Int64)
accuracy = accuracy_field.get<Int64>();
accuracy = accuracy_field.safeGet<Int64>();
else
accuracy = accuracy_field.get<UInt64>();
accuracy = accuracy_field.safeGet<UInt64>();
if (accuracy <= 0)
throw Exception(

View File

@ -300,12 +300,12 @@ public:
/// Compatibility with previous versions.
if (value.getType() == Field::Types::Decimal32)
{
auto source = value.get<DecimalField<Decimal32>>();
auto source = value.safeGet<DecimalField<Decimal32>>();
value = DecimalField<Decimal128>(source.getValue(), source.getScale());
}
else if (value.getType() == Field::Types::Decimal64)
{
auto source = value.get<DecimalField<Decimal64>>();
auto source = value.safeGet<DecimalField<Decimal64>>();
value = DecimalField<Decimal128>(source.getValue(), source.getScale());
}
@ -355,7 +355,7 @@ public:
/// Compatibility with previous versions.
if (value.getType() == Field::Types::Decimal128)
{
auto source = value.get<DecimalField<Decimal128>>();
auto source = value.safeGet<DecimalField<Decimal128>>();
WhichDataType value_type(values_types[col_idx]);
if (value_type.isDecimal32())
{
@ -560,7 +560,7 @@ private:
template <typename FieldType>
bool compareImpl(FieldType & x) const
{
auto val = rhs.get<FieldType>();
auto val = rhs.safeGet<FieldType>();
if (val > x)
{
x = val;
@ -600,7 +600,7 @@ private:
template <typename FieldType>
bool compareImpl(FieldType & x) const
{
auto val = rhs.get<FieldType>();
auto val = rhs.safeGet<FieldType>();
if (val < x)
{
x = val;

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)

View File

@ -137,7 +137,7 @@ private:
if (constant_node_value.getType() != Field::Types::Which::Tuple)
return {};
const auto & constant_tuple = constant_node_value.get<const Tuple &>();
const auto & constant_tuple = constant_node_value.safeGet<const Tuple &>();
const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();

View File

@ -89,7 +89,7 @@ public:
if (!pattern || !isString(pattern->getResultType()))
continue;
auto regexp = likePatternToRegexp(pattern->getValue().get<String>());
auto regexp = likePatternToRegexp(pattern->getValue().safeGet<String>());
/// Case insensitive. Works with UTF-8 as well.
if (is_ilike)
regexp = "(?i)" + regexp;

View File

@ -68,10 +68,10 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return value.safeGet<const String &>();
if (value.getType() == Field::Types::UInt64)
return data_type_tuple.getNameByPosition(value.get<UInt64>());
return data_type_tuple.getNameByPosition(value.safeGet<UInt64>());
return "";
}
@ -79,7 +79,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & dat
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return value.safeGet<const String &>();
return "";
}

View File

@ -187,7 +187,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector<QueryTreeNodePtr *> & nodes
/// Sort nodes and parameters in ascending order of quantile level
std::vector<size_t> permutation(nodes.size());
iota(permutation.data(), permutation.size(), size_t(0));
std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get<Float64>() < parameters[j].get<Float64>(); });
std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].safeGet<Float64>() < parameters[j].safeGet<Float64>(); });
std::vector<QueryTreeNodePtr *> new_nodes;
new_nodes.reserve(permutation.size());

View File

@ -134,8 +134,8 @@ public:
return;
std::set<std::string> string_values;
string_values.insert(first_literal->getValue().get<std::string>());
string_values.insert(second_literal->getValue().get<std::string>());
string_values.insert(first_literal->getValue().safeGet<std::string>());
string_values.insert(second_literal->getValue().safeGet<std::string>());
changeIfArguments(*function_if_node, string_values, context);
wrapIntoToString(*function_node, std::move(modified_if_node), context);
@ -163,7 +163,7 @@ public:
if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType()))
return;
auto array_to = literal_to->getValue().get<Array>();
auto array_to = literal_to->getValue().safeGet<Array>();
if (array_to.empty())
return;
@ -178,9 +178,9 @@ public:
std::set<std::string> string_values;
for (const auto & value : array_to)
string_values.insert(value.get<std::string>());
string_values.insert(value.safeGet<std::string>());
string_values.insert(literal_default->getValue().get<std::string>());
string_values.insert(literal_default->getValue().safeGet<std::string>());
changeTransformArguments(*function_modified_transform_node, string_values, context);
wrapIntoToString(*function_node, std::move(modified_transform_node), context);

View File

@ -54,7 +54,7 @@ public:
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1)
first_argument_constant_literal.safeGet<UInt64>() == 1)
{
function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");

View File

@ -143,13 +143,13 @@ private:
const auto & column_type = column_node_typed.getColumnType().get();
if (isDateOrDate32(column_type))
{
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
start_date_or_date_time = date_lut.dateToString(range.first.safeGet<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.dateToString(range.second.safeGet<DateLUTImpl::Time>());
}
else if (isDateTime(column_type) || isDateTime64(column_type))
{
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
start_date_or_date_time = date_lut.timeToString(range.first.safeGet<DateLUTImpl::Time>());
end_date_or_date_time = date_lut.timeToString(range.second.safeGet<DateLUTImpl::Time>());
}
else [[unlikely]]
return {};

View File

@ -60,7 +60,7 @@ public:
{
const auto & second_const_value = second_const_node->getValue();
if (second_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.safeGet<UInt64>() == 0
&& !if_node->getResultType()->isNullable()))
{
/// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond)
@ -89,7 +89,7 @@ public:
{
const auto & first_const_value = first_const_node->getValue();
if (first_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.safeGet<UInt64>() == 0
&& !if_node->getResultType()->isNullable()))
{
/// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond))

View File

@ -66,7 +66,7 @@ public:
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (constant_value_literal.get<UInt64>() != 1)
if (constant_value_literal.safeGet<UInt64>() != 1)
{
/// Rewrite `sumIf(123, cond)` into `123 * countIf(cond)`
node = getMultiplyFunction(std::move(multiplier_node), node);
@ -105,8 +105,8 @@ public:
const auto & if_true_condition_constant_value_literal = if_true_condition_constant_node->getValue();
const auto & if_false_condition_constant_value_literal = if_false_condition_constant_node->getValue();
auto if_true_condition_value = if_true_condition_constant_value_literal.get<UInt64>();
auto if_false_condition_value = if_false_condition_constant_value_literal.get<UInt64>();
auto if_true_condition_value = if_true_condition_constant_value_literal.safeGet<UInt64>();
auto if_false_condition_value = if_false_condition_constant_value_literal.safeGet<UInt64>();
if (if_false_condition_value == 0)
{

View File

@ -471,7 +471,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
std::shared_ptr<Collator> collator;
if (order_by_element.getCollation())
collator = std::make_shared<Collator>(order_by_element.getCollation()->as<ASTLiteral &>().value.get<String &>());
collator = std::make_shared<Collator>(order_by_element.getCollation()->as<ASTLiteral &>().value.safeGet<String &>());
const auto & sort_expression_ast = order_by_element.children.at(0);
auto sort_expression = buildExpression(sort_expression_ast, context);

View File

@ -1273,7 +1273,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns(
const auto & constant_node_value = constant_node.getValue();
if (constant_node_value.getType() == Field::Types::String)
{
array_join_subcolumn_prefix = constant_node_value.get<String>() + ".";
array_join_subcolumn_prefix = constant_node_value.safeGet<String>() + ".";
array_join_parent_column = argument_nodes.at(0).get();
}
}
@ -1287,7 +1287,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns(
if (!second_argument || second_argument->getValue().getType() != Field::Types::String)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree());
const auto & resolved_subcolumn_path = second_argument->getValue().get<String &>();
const auto & resolved_subcolumn_path = second_argument->getValue().safeGet<String &>();
if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix))
return {};
@ -1331,7 +1331,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression
size_t nested_function_arguments_size = nested_function_arguments.size();
const auto & nested_keys_names_constant_node = nested_function_arguments[0]->as<ConstantNode & >();
const auto & nested_keys_names = nested_keys_names_constant_node.getValue().get<Array &>();
const auto & nested_keys_names = nested_keys_names_constant_node.getValue().safeGet<Array &>();
size_t nested_keys_names_size = nested_keys_names.size();
if (nested_keys_names_size == nested_function_arguments_size - 1)
@ -1344,7 +1344,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
array_join_column_expression_typed.getColumnSource());
const auto & nested_key_name = nested_keys_names[i - 1].get<String &>();
const auto & nested_key_name = nested_keys_names[i - 1].safeGet<String &>();
Identifier nested_identifier = Identifier(nested_key_name);
array_join_resolved_expression = wrapExpressionNodeInTupleElement(array_join_column, nested_identifier, scope.context);
break;

View File

@ -748,11 +748,11 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
UInt64 pos;
if (constant_node->getValue().getType() == Field::Types::UInt64)
{
pos = constant_node->getValue().get<UInt64>();
pos = constant_node->getValue().safeGet<UInt64>();
}
else // Int64
{
auto value = constant_node->getValue().get<Int64>();
auto value = constant_node->getValue().safeGet<Int64>();
if (value > 0)
pos = value;
else

View File

@ -99,7 +99,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes&
"Invalid type in set. Expected tuple, got {}",
value.getTypeName());
const auto & tuple = value.template get<const Tuple &>();
const auto & tuple = value.template safeGet<const Tuple &>();
const DataTypePtr & value_type = value_types[collection_index];
const DataTypes & tuple_value_type = typeid_cast<const DataTypeTuple *>(value_type.get())->getElements();
@ -175,15 +175,15 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const
if (rhs_which_type.isArray())
{
const DataTypeArray * value_array_type = assert_cast<const DataTypeArray *>(value_type.get());
size_t value_array_size = value.get<const Array &>().size();
size_t value_array_size = value.safeGet<const Array &>().size();
DataTypes value_types(value_array_size, value_array_type->getNestedType());
result_block = createBlockFromCollection(value.get<const Array &>(), value_types, set_element_types, transform_null_in);
result_block = createBlockFromCollection(value.safeGet<const Array &>(), value_types, set_element_types, transform_null_in);
}
else if (rhs_which_type.isTuple())
{
const DataTypeTuple * value_tuple_type = assert_cast<const DataTypeTuple *>(value_type.get());
const DataTypes & value_types = value_tuple_type->getElements();
result_block = createBlockFromCollection(value.get<const Tuple &>(), value_types, set_element_types, transform_null_in);
result_block = createBlockFromCollection(value.safeGet<const Tuple &>(), value_types, set_element_types, transform_null_in);
}
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,

View File

@ -126,7 +126,7 @@ std::vector<Strings> BackupSettings::Util::clusterHostIDsFromAST(const IAST & as
throw Exception(
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
const auto & replicas = array_of_replicas->value.get<const Array &>();
const auto & replicas = array_of_replicas->value.safeGet<const Array &>();
res[i].resize(replicas.size());
for (size_t j = 0; j != replicas.size(); ++j)
{
@ -135,7 +135,7 @@ std::vector<Strings> BackupSettings::Util::clusterHostIDsFromAST(const IAST & as
throw Exception(
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
res[i][j] = replica.get<const String &>();
res[i][j] = replica.safeGet<const String &>();
}
}
}

View File

@ -490,6 +490,8 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
/// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously.
auto process_list_element = context_in_use->getProcessListElement();
/// Update context to preserve query information in processlist (settings, current_database)
process_list_element->updateContext(context_in_use);
thread_pool.scheduleOrThrowOnError(
[this,
@ -853,6 +855,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
/// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously.
auto process_list_element = context_in_use->getProcessListElement();
/// Update context to preserve query information in processlist (settings, current_database)
process_list_element->updateContext(context_in_use);
thread_pool.scheduleOrThrowOnError(
[this,

View File

@ -46,8 +46,8 @@ namespace
if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) &&
replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String))
{
String & zookeeper_path_arg = zookeeper_path_ast->value.get<String>();
String & replica_name_arg = replica_name_ast->value.get<String>();
String & zookeeper_path_arg = zookeeper_path_ast->value.safeGet<String>();
String & replica_name_arg = replica_name_ast->value.safeGet<String>();
if (create.uuid != UUIDHelpers::Nil)
{
String table_uuid_str = toString(create.uuid);

View File

@ -31,7 +31,7 @@ namespace
{
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
const String & str = field.safeGet<const String &>();
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreTableCreationMode::kCreate;
@ -54,7 +54,7 @@ namespace
if (field.getType() == Field::Types::UInt64)
{
UInt64 number = field.get<UInt64>();
UInt64 number = field.safeGet<UInt64>();
if (number == 1)
{
value = RestoreTableCreationMode::kCreate;
@ -95,7 +95,7 @@ namespace
{
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
const String & str = field.safeGet<const String &>();
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreAccessCreationMode::kCreate;
@ -118,7 +118,7 @@ namespace
if (field.getType() == Field::Types::UInt64)
{
UInt64 number = field.get<UInt64>();
UInt64 number = field.safeGet<UInt64>();
if (number == 1)
{
value = RestoreAccessCreationMode::kCreate;

View File

@ -19,7 +19,7 @@ SettingFieldOptionalString::SettingFieldOptionalString(const Field & field)
if (field.getType() == Field::Types::String)
{
value = field.get<const String &>();
value = field.safeGet<const String &>();
return;
}

View File

@ -22,7 +22,7 @@ namespace ErrorCodes
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
const String & str = field.safeGet<const String &>();
if (str.empty())
{
value = std::nullopt;

View File

@ -1,4 +1,4 @@
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
if (USE_INCLUDE_WHAT_YOU_USE)
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
@ -602,10 +602,6 @@ endif()
dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing)
if (TARGET ch_contrib::annoy)
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
endif()
if (TARGET ch_contrib::usearch)
dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
endif()

View File

@ -2751,7 +2751,7 @@ void ClientBase::runLibFuzzer()
for (auto & arg : fuzzer_args_holder)
fuzzer_args.emplace_back(arg.data());
int fuzzer_argc = fuzzer_args.size();
int fuzzer_argc = static_cast<int>(fuzzer_args.size());
char ** fuzzer_argv = fuzzer_args.data();
LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)

View File

@ -214,7 +214,7 @@ void Suggest::fillWordsFromBlock(const Block & block)
Words new_words;
new_words.reserve(rows);
for (size_t i = 0; i < rows; ++i)
new_words.emplace_back(column[i].get<String>());
new_words.emplace_back(column[i].safeGet<String>());
addWords(std::move(new_words));
}

View File

@ -457,9 +457,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const
Field ColumnAggregateFunction::operator[](size_t n) const
{
Field field = AggregateFunctionStateData();
field.get<AggregateFunctionStateData &>().name = type_string;
field.safeGet<AggregateFunctionStateData &>().name = type_string;
{
WriteBufferFromString buffer(field.get<AggregateFunctionStateData &>().data);
WriteBufferFromString buffer(field.safeGet<AggregateFunctionStateData &>().data);
func->serialize(data[n], buffer, version);
}
return field;
@ -467,12 +467,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const
void ColumnAggregateFunction::get(size_t n, Field & res) const
{
res = AggregateFunctionStateData();
res.get<AggregateFunctionStateData &>().name = type_string;
{
WriteBufferFromString buffer(res.get<AggregateFunctionStateData &>().data);
func->serialize(data[n], buffer, version);
}
res = operator[](n);
}
StringRef ColumnAggregateFunction::getDataAt(size_t n) const
@ -552,7 +547,7 @@ void ColumnAggregateFunction::insert(const Field & x)
"Inserting field of type {} into ColumnAggregateFunction. Expected {}",
x.getTypeName(), Field::Types::AggregateFunctionState);
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
const auto & field_name = x.safeGet<const AggregateFunctionStateData &>().name;
if (type_string != field_name)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot insert filed with type {} into column with type {}",
field_name, type_string);
@ -560,7 +555,7 @@ void ColumnAggregateFunction::insert(const Field & x)
ensureOwnership();
Arena & arena = createOrGetArena();
pushBackAndCreateState(data, arena, func.get());
ReadBufferFromString read_buffer(x.get<const AggregateFunctionStateData &>().data);
ReadBufferFromString read_buffer(x.safeGet<const AggregateFunctionStateData &>().data);
func->deserialize(data.back(), read_buffer, version, &arena);
}
@ -569,14 +564,14 @@ bool ColumnAggregateFunction::tryInsert(const DB::Field & x)
if (x.getType() != Field::Types::AggregateFunctionState)
return false;
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
const auto & field_name = x.safeGet<const AggregateFunctionStateData &>().name;
if (type_string != field_name)
return false;
ensureOwnership();
Arena & arena = createOrGetArena();
pushBackAndCreateState(data, arena, func.get());
ReadBufferFromString read_buffer(x.get<const AggregateFunctionStateData &>().data);
ReadBufferFromString read_buffer(x.safeGet<const AggregateFunctionStateData &>().data);
func->deserialize(data.back(), read_buffer, version, &arena);
return true;
}

View File

@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const
size, max_array_size_as_field);
res = Array();
Array & res_arr = res.get<Array &>();
Array & res_arr = res.safeGet<Array &>();
res_arr.reserve(size);
for (size_t i = 0; i < size; ++i)
@ -309,7 +309,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const
void ColumnArray::insert(const Field & x)
{
const Array & array = x.get<const Array &>();
const Array & array = x.safeGet<const Array &>();
size_t size = array.size();
for (size_t i = 0; i < size; ++i)
getData().insert(array[i]);
@ -321,7 +321,7 @@ bool ColumnArray::tryInsert(const Field & x)
if (x.getType() != Field::Types::Which::Array)
return false;
const Array & array = x.get<const Array &>();
const Array & array = x.safeGet<const Array &>();
size_t size = array.size();
for (size_t i = 0; i < size; ++i)
{
@ -452,6 +452,22 @@ void ColumnArray::reserve(size_t n)
getData().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
}
void ColumnArray::prepareForSquashing(const Columns & source_columns)
{
size_t new_size = size();
Columns source_data_columns;
source_data_columns.reserve(source_columns.size());
for (const auto & source_column : source_columns)
{
const auto & source_array_column = assert_cast<const ColumnArray &>(*source_column);
new_size += source_array_column.size();
source_data_columns.push_back(source_array_column.getDataPtr());
}
getOffsets().reserve_exact(new_size);
data->prepareForSquashing(source_data_columns);
}
void ColumnArray::shrinkToFit()
{
getOffsets().shrink_to_fit();

View File

@ -118,6 +118,7 @@ public:
void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override;
void prepareForSquashing(const Columns & source_columns) override;
void shrinkToFit() override;
void ensureOwnership() override;
size_t byteSize() const override;

View File

@ -74,7 +74,7 @@ public:
void insertData(const char * src, size_t /*length*/) override;
void insertDefault() override { data.push_back(T()); }
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); }
void insert(const Field & x) override { data.push_back(x.safeGet<T>()); }
bool tryInsert(const Field & x) override;
#if !defined(DEBUG_OR_SANITIZER_BUILD)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;

View File

@ -643,6 +643,116 @@ ColumnPtr ColumnDynamic::compress() const
});
}
void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
{
if (source_columns.empty())
return;
/// Internal variants of source dynamic columns may differ.
/// We want to preallocate memory for all variants we will have after squashing.
/// It may happen that the total number of variants in source columns will
/// exceed the limit, in this case we will choose the most frequent variants.
/// First, preallocate memory for variant discriminators and offsets.
size_t new_size = size();
for (const auto & source_column : source_columns)
new_size += source_column->size();
auto & variant_col = getVariantColumn();
variant_col.getLocalDiscriminators().reserve_exact(new_size);
variant_col.getOffsets().reserve_exact(new_size);
/// Second, collect all variants and their total sizes.
std::unordered_map<String, size_t> total_variant_sizes;
DataTypes all_variants;
auto add_variants = [&](const ColumnDynamic & source_dynamic)
{
const auto & source_variant_column = source_dynamic.getVariantColumn();
const auto & source_variant_info = source_dynamic.getVariantInfo();
const auto & source_variants = assert_cast<const DataTypeVariant &>(*source_variant_info.variant_type).getVariants();
for (size_t i = 0; i != source_variants.size(); ++i)
{
const auto & variant_name = source_variant_info.variant_names[i];
auto it = total_variant_sizes.find(variant_name);
/// Add this variant to the list of all variants if we didn't see it yet.
if (it == total_variant_sizes.end())
{
all_variants.push_back(source_variants[i]);
it = total_variant_sizes.emplace(variant_name, 0).first;
}
it->second += source_variant_column.getVariantByGlobalDiscriminator(i).size();
}
};
for (const auto & source_column : source_columns)
add_variants(assert_cast<const ColumnDynamic &>(*source_column));
/// Add variants from this dynamic column.
add_variants(*this);
DataTypePtr result_variant_type;
/// Check if the number of all variants exceeds the limit.
if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String")))
{
/// We want to keep the most frequent variants in the resulting dynamic column.
DataTypes result_variants;
result_variants.reserve(max_dynamic_types);
/// Add variants from current variant column as we will not rewrite it.
for (const auto & variant : assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants())
result_variants.push_back(variant);
/// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit.
if (!variant_info.variant_name_to_discriminator.contains("String"))
result_variants.push_back(std::make_shared<DataTypeString>());
/// Create list of remaining variants with their sizes and sort it.
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
variants_with_sizes.reserve(all_variants.size() - variant_info.variant_names.size());
for (const auto & variant : all_variants)
{
/// Add variant to the list only of we didn't add it yet.
auto variant_name = variant->getName();
if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant);
}
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
/// Add the most frequent variants until we reach max_dynamic_types.
size_t num_new_variants = max_dynamic_types - result_variants.size();
for (size_t i = 0; i != num_new_variants; ++i)
result_variants.push_back(variants_with_sizes[i].second);
result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
}
else
{
result_variant_type = std::make_shared<DataTypeVariant>(all_variants);
}
if (!result_variant_type->equals(*variant_info.variant_type))
updateVariantInfoAndExpandVariantColumn(result_variant_type);
/// Now current dynamic column has all resulting variants and we can call
/// prepareForSquashing on them to preallocate the memory.
for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
{
Columns source_variant_columns;
source_variant_columns.reserve(source_columns.size());
for (const auto & source_column : source_columns)
{
const auto & source_dynamic_column = assert_cast<const ColumnDynamic &>(*source_column);
const auto & source_variant_info = source_dynamic_column.getVariantInfo();
/// Try to find this variant in the current source column.
auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]);
if (it != source_variant_info.variant_name_to_discriminator.end())
source_variant_columns.push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second));
}
variant_col.getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns);
}
}
void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
{
if (!empty())

View File

@ -254,6 +254,8 @@ public:
variant_column->reserve(n);
}
void prepareForSquashing(const Columns & source_columns) override;
void ensureOwnership() override
{
variant_column->ensureOwnership();

View File

@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
void ColumnFixedString::insert(const Field & x)
{
const String & s = x.get<const String &>();
const String & s = x.safeGet<const String &>();
insertData(s.data(), s.size());
}
@ -67,7 +67,7 @@ bool ColumnFixedString::tryInsert(const Field & x)
{
if (x.getType() != Field::Types::Which::String)
return false;
const String & s = x.get<const String &>();
const String & s = x.safeGet<const String &>();
if (s.size() > n)
return false;
insertData(s.data(), s.size());

View File

@ -72,7 +72,7 @@ void ColumnMap::get(size_t n, Field & res) const
size_t size = offsets[n] - offsets[n - 1];
res = Map();
auto & map = res.get<Map &>();
auto & map = res.safeGet<Map &>();
map.reserve(size);
for (size_t i = 0; i < size; ++i)
@ -96,7 +96,7 @@ void ColumnMap::insertData(const char *, size_t)
void ColumnMap::insert(const Field & x)
{
const auto & map = x.get<const Map &>();
const auto & map = x.safeGet<const Map &>();
nested->insert(Array(map.begin(), map.end()));
}
@ -105,7 +105,7 @@ bool ColumnMap::tryInsert(const Field & x)
if (x.getType() != Field::Types::Which::Map)
return false;
const auto & map = x.get<const Map &>();
const auto & map = x.safeGet<const Map &>();
return nested->tryInsert(Array(map.begin(), map.end()));
}
@ -249,6 +249,15 @@ void ColumnMap::reserve(size_t n)
nested->reserve(n);
}
void ColumnMap::prepareForSquashing(const Columns & source_columns)
{
Columns nested_source_columns;
nested_source_columns.reserve(source_columns.size());
for (const auto & source_column : source_columns)
nested_source_columns.push_back(assert_cast<const ColumnMap &>(*source_column).getNestedColumnPtr());
nested->prepareForSquashing(nested_source_columns);
}
void ColumnMap::shrinkToFit()
{
nested->shrinkToFit();
@ -288,8 +297,8 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
/// Convert result Array fields to Map fields because client expect min and max field to have type Map
Array nested_min_value = nested_min.get<Array>();
Array nested_max_value = nested_max.get<Array>();
Array nested_min_value = nested_min.safeGet<Array>();
Array nested_max_value = nested_max.safeGet<Array>();
Map map_min_value(nested_min_value.begin(), nested_min_value.end());
Map map_max_value(nested_max_value.begin(), nested_max_value.end());

View File

@ -94,6 +94,7 @@ public:
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void reserve(size_t n) override;
void prepareForSquashing(const Columns & source_columns) override;
void shrinkToFit() override;
void ensureOwnership() override;
size_t byteSize() const override;

View File

@ -706,6 +706,22 @@ void ColumnNullable::reserve(size_t n)
getNullMapData().reserve(n);
}
void ColumnNullable::prepareForSquashing(const Columns & source_columns)
{
size_t new_size = size();
Columns nested_source_columns;
nested_source_columns.reserve(source_columns.size());
for (const auto & source_column : source_columns)
{
const auto & source_nullable_column = assert_cast<const ColumnNullable &>(*source_column);
new_size += source_nullable_column.size();
nested_source_columns.push_back(source_nullable_column.getNestedColumnPtr());
}
nested_column->prepareForSquashing(nested_source_columns);
getNullMapData().reserve(new_size);
}
void ColumnNullable::shrinkToFit()
{
getNestedColumn().shrinkToFit();

View File

@ -125,6 +125,7 @@ public:
size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
void reserve(size_t n) override;
void prepareForSquashing(const Columns & source_columns) override;
void shrinkToFit() override;
void ensureOwnership() override;
size_t byteSize() const override;

View File

@ -698,7 +698,7 @@ void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback ca
void ColumnObject::insert(const Field & field)
{
const auto & object = field.get<const Object &>();
const auto & object = field.safeGet<const Object &>();
HashSet<StringRef, StringRefHash> inserted_paths;
size_t old_size = size();
@ -754,7 +754,7 @@ void ColumnObject::get(size_t n, Field & res) const
{
assert(n < size());
res = Object();
auto & object = res.get<Object &>();
auto & object = res.safeGet<Object &>();
for (const auto & entry : subcolumns)
{

Some files were not shown because too many files have changed in this diff Show More