mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge branch 'master' into divanik/add_local_and_azure_iceberg_support
This commit is contained in:
commit
f9f41405cc
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
|
||||
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
|
||||
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
|
||||
---
|
||||
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
|
||||
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
|
||||
---
|
||||
- [ ] <!---do_not_test--> Do not test
|
||||
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
|
||||
- [ ] <!---upload_all--> Upload binaries for special builds
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -230,9 +230,6 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/annoy"]
|
||||
path = contrib/annoy
|
||||
url = https://github.com/ClickHouse/annoy
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -58,6 +58,10 @@ namespace Net
|
||||
|
||||
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
|
||||
|
||||
size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
|
||||
|
||||
size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
|
||||
|
||||
private:
|
||||
bool _firstRequest;
|
||||
Poco::Timespan _keepAliveTimeout;
|
||||
|
@ -19,11 +19,11 @@ namespace Poco {
|
||||
namespace Net {
|
||||
|
||||
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
|
||||
HTTPSession(socket, pParams->getKeepAlive()),
|
||||
_firstRequest(true),
|
||||
_keepAliveTimeout(pParams->getKeepAliveTimeout()),
|
||||
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
|
||||
: HTTPSession(socket, pParams->getKeepAlive())
|
||||
, _firstRequest(true)
|
||||
, _keepAliveTimeout(pParams->getKeepAliveTimeout())
|
||||
, _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
{
|
||||
setTimeout(pParams->getTimeout());
|
||||
}
|
||||
@ -52,11 +52,12 @@ bool HTTPServerSession::hasMoreRequests()
|
||||
}
|
||||
else if (_maxKeepAliveRequests != 0 && getKeepAlive())
|
||||
{
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else return false;
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
message (STATUS "Enabled instrumentation for code coverage")
|
||||
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
endif()
|
||||
|
||||
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
|
||||
|
3
contrib/CMakeLists.txt
vendored
3
contrib/CMakeLists.txt
vendored
@ -205,9 +205,8 @@ add_contrib (morton-nd-cmake morton-nd)
|
||||
if (ARCH_S390X)
|
||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||
endif()
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
|
||||
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_USEARCH)
|
||||
add_contrib (FP16-cmake FP16)
|
||||
add_contrib (robin-map-cmake robin-map)
|
||||
|
1
contrib/annoy
vendored
1
contrib/annoy
vendored
@ -1 +0,0 @@
|
||||
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956
|
@ -1,24 +0,0 @@
|
||||
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
|
||||
|
||||
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
|
||||
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
|
||||
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
|
||||
# https://github.com/spotify/annoy/issues/456
|
||||
# Problem with aligment can lead to errors like
|
||||
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
|
||||
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
|
||||
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
|
||||
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
|
||||
message (STATUS "Not using annoy")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
|
||||
add_library(_annoy INTERFACE)
|
||||
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
||||
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
|
||||
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
|
2
contrib/libprotobuf-mutator
vendored
2
contrib/libprotobuf-mutator
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
|
||||
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa
|
@ -1,9 +1,7 @@
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
|
||||
|
||||
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
|
||||
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
|
||||
add_library(_usearch INTERFACE)
|
||||
|
||||
@ -11,7 +9,6 @@ target_include_directories(_usearch SYSTEM INTERFACE
|
||||
${FP16_PROJECT_DIR}/include
|
||||
${ROBIN_MAP_PROJECT_DIR}/include
|
||||
${SIMSIMD_PROJECT_DIR}/include
|
||||
${USEARCH_SOURCE_DIR})
|
||||
${USEARCH_PROJECT_DIR}/include)
|
||||
|
||||
add_library(ch_contrib::usearch ALIAS _usearch)
|
||||
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
|
||||
|
@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
|
||||
mv ./programs/clickhouse* /output ||:
|
||||
mv ./programs/*_fuzzer /output ||:
|
||||
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
|
||||
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
|
||||
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output
|
||||
|
@ -54,8 +54,6 @@ source /utils.lib
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh stateless
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
./setup_hdfs_minicluster.sh
|
||||
|
||||
@ -176,6 +174,55 @@ done
|
||||
setup_logs_replication
|
||||
attach_gdb_to_clickhouse
|
||||
|
||||
# create tables for minio log webhooks
|
||||
clickhouse-client --query "CREATE TABLE minio_audit_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
clickhouse-client --query "CREATE TABLE minio_server_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
# create minio log webhooks for both audit and server logs
|
||||
# use async inserts to avoid creating too many parts
|
||||
./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
|
||||
max_retries=100
|
||||
retry=1
|
||||
while [ $retry -le $max_retries ]; do
|
||||
echo "clickminio restart attempt $retry:"
|
||||
|
||||
output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status)
|
||||
echo "Output of restart status: $output"
|
||||
|
||||
expected_output="success
|
||||
success"
|
||||
if [ "$output" = "$expected_output" ]; then
|
||||
echo "Restarted clickminio successfully."
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
|
||||
retry=$((retry + 1))
|
||||
done
|
||||
|
||||
if [ $retry -gt $max_retries ]; then
|
||||
echo "Failed to restart clickminio after $max_retries attempts."
|
||||
fi
|
||||
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
function fn_exists() {
|
||||
declare -F "$1" > /dev/null;
|
||||
}
|
||||
@ -339,6 +386,14 @@ do
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# collect minio audit and server logs
|
||||
# wait for minio to flush its batch if it has any
|
||||
sleep 1
|
||||
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
|
||||
clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
|
||||
# Stop server so we can safely read data with clickhouse-local.
|
||||
# Why do we read data with clickhouse-local?
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
|
@ -99,10 +99,9 @@ upload_data() {
|
||||
# iterating over globs will cause redundant file variable to be
|
||||
# a path to a file, not a filename
|
||||
# shellcheck disable=SC2045
|
||||
for file in $(ls "${data_path}"); do
|
||||
echo "${file}";
|
||||
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
|
||||
done
|
||||
if [ -d "${data_path}" ]; then
|
||||
./mc cp --recursive "${data_path}"/ clickminio/test/
|
||||
fi
|
||||
}
|
||||
|
||||
setup_aws_credentials() {
|
||||
|
@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
|
||||
|
||||
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
|
||||
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
|
||||
:::note
|
||||
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
|
||||
@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
|
||||
|
||||
### Adding a New Test
|
||||
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
|
||||
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
|
||||
|
||||
|
@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
@ -27,75 +27,109 @@ Function `Distance` computes the distance between two vectors. Often, the Euclid
|
||||
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
|
||||
0.33, ...)`, and `N` limits the number of search results.
|
||||
|
||||
An alternative formulation of the nearest neighborhood search problem looks as follows:
|
||||
This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
|
||||
situations where `MaxDistance` is difficult to determine in advance.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
|
||||
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
|
||||
`MaxDistance` is difficult to determine in advance.
|
||||
|
||||
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
|
||||
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
|
||||
|
||||
# Creating and Using ANN Indexes {#creating_using_ann_indexes}
|
||||
# Creating and Using Vector Similarity Indexes
|
||||
|
||||
Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ann_index
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Array(Float32),
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Parameters:
|
||||
- `method`: Supports currently only `hnsw`.
|
||||
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
|
||||
line between two points in Euclidean space), or `cosineDistance` (the [cosine
|
||||
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
|
||||
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
|
||||
- `m`: the number of neighbors per graph node (optional, default: 16)
|
||||
- `ef_construction`: (optional, default: 128)
|
||||
- `ef_search`: (optional, default: 64)
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
|
||||
to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
|
||||
Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
|
||||
files, without loading them into RAM.
|
||||
|
||||
USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
|
||||
|
||||
Vector similarity indexes currently support two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
|
||||
If no scalar kind was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
:::note
|
||||
The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support two types of queries:
|
||||
|
||||
- ORDER BY queries:
|
||||
ANN indexes support these queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query
|
||||
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
|
||||
```bash
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
```
|
||||
:::
|
||||
|
||||
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
|
||||
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
|
||||
without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
|
||||
cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
|
||||
approximate neighbor search.
|
||||
|
||||
@ -122,128 +156,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
|
||||
was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
|
||||
# Available ANN Indexes {#available_ann_indexes}
|
||||
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
|
||||
linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_annoy_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Annoy currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
|
||||
more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
|
||||
setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
|
||||
careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
|
||||
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
|
||||
values mean more accurate results at the cost of longer query runtime:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
ORDER BY L2Distance(vectors, Point)
|
||||
LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
||||
:::note
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
|
||||
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
|
||||
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
|
||||
navigation around immutable persistent files, without loading them into RAM.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
USearch currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
|
||||
was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
|
||||
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
|
||||
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
|
||||
|
||||
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
|
||||
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
|
||||
considers results of the same query with different tags different.
|
||||
|
||||
Example for creating three different query cache entries for the same query:
|
||||
|
||||
```sql
|
||||
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
|
||||
```
|
||||
|
||||
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
|
||||
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
|
||||
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
|
||||
|
@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## max_keep_alive_requests {#max-keep-alive-requests}
|
||||
|
||||
Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_keep_alive_requests>10</max_keep_alive_requests>
|
||||
```
|
||||
|
||||
## listen_host {#listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
|
@ -1800,6 +1800,17 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_cache_tag {#query-cache-tag}
|
||||
|
||||
A string which acts as a label for [query cache](../query-cache.md) entries.
|
||||
The same queries with different tags are considered different by the query cache.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string
|
||||
|
||||
Default value: `''`
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
@ -5627,6 +5638,12 @@ Disable all insert and mutations (alter table update / alter table delete / alte
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## use_hive_partitioning
|
||||
|
||||
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
|
||||
|
||||
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
|
||||
|
@ -24,6 +24,7 @@ Columns:
|
||||
- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions
|
||||
- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster
|
||||
- `is_currently_used`, (UInt8) - consumer is in use
|
||||
- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds
|
||||
- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds).
|
||||
|
||||
Example:
|
||||
|
@ -9,6 +9,7 @@ Columns:
|
||||
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
|
||||
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
|
||||
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
|
||||
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
|
||||
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
|
||||
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
|
||||
@ -26,6 +27,7 @@ Row 1:
|
||||
──────
|
||||
query: SELECT 1 SETTINGS use_query_cache = 1
|
||||
result_size: 128
|
||||
tag:
|
||||
stale: 0
|
||||
shared: 0
|
||||
compressed: 1
|
||||
|
@ -4189,3 +4189,94 @@ Result:
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## getSubcolumn
|
||||
|
||||
Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getSubcolumn(col_name, subcol_name)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
|
||||
- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the extracted sub-column.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
|
||||
SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
|
||||
1. │ [1,2] │ ['Hello','World'] │
|
||||
2. │ [3,4,5] │ ['This','is','subcolumn'] │
|
||||
└─────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## getTypeSerializationStreams
|
||||
|
||||
Enumerates stream paths of a data type.
|
||||
|
||||
:::note
|
||||
This function is intended for use by developers.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getTypeSerializationStreams(col)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col` — Column or string representation of a data-type from which the data type will be detected.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams('Map(String, Int64)');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -8,26 +8,28 @@ sidebar_label: STATISTICS
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
There is an example adding two statistics types to two columns:
|
||||
## Example:
|
||||
|
||||
Adding two statistics types to two columns:
|
||||
|
||||
```
|
||||
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
|
||||
```
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
||||
|
@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
|
@ -100,6 +100,19 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -274,6 +274,19 @@ FROM s3(
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos
|
||||
|
||||
示例 (进入 `config.xml`):
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos />
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
主体规范:
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos>
|
||||
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
|
||||
</kerberos>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
按领域过滤:
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos>
|
||||
<realm>EXAMPLE.COM</realm>
|
||||
</kerberos>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
!!! warning "注意"
|
||||
@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式:
|
||||
|
||||
示例 (进入 `users.xml`):
|
||||
```
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<users>
|
||||
<!- ... -->
|
||||
@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式:
|
||||
</kerberos>
|
||||
</my_user>
|
||||
</users>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
!!! warning "警告"
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -223,7 +223,7 @@ std::vector<String> Client::loadWarningMessages()
|
||||
|
||||
size_t rows = packet.block.rows();
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
messages.emplace_back(column[i].get<String>());
|
||||
messages.emplace_back(column[i].safeGet<String>());
|
||||
}
|
||||
continue;
|
||||
|
||||
|
@ -95,7 +95,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
client->zookeeper->set(
|
||||
client->getAbsolutePath(query->args[0].safeGet<String>()),
|
||||
query->args[1].safeGet<String>(),
|
||||
static_cast<Int32>(query->args[2].get<Int32>()));
|
||||
static_cast<Int32>(query->args[2].safeGet<Int32>()));
|
||||
}
|
||||
|
||||
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
@ -494,7 +494,7 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
{
|
||||
Int32 version{-1};
|
||||
if (query->args.size() == 2)
|
||||
version = static_cast<Int32>(query->args[1].get<Int32>());
|
||||
version = static_cast<Int32>(query->args[1].safeGet<Int32>());
|
||||
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
|
||||
}
|
||||
@ -549,7 +549,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
|
||||
String leaving;
|
||||
String new_members;
|
||||
|
||||
auto operation = query->args[0].get<ReconfigCommand::Operation>();
|
||||
auto operation = query->args[0].safeGet<ReconfigCommand::Operation>();
|
||||
switch (operation)
|
||||
{
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
|
||||
|
@ -27,7 +27,7 @@ std::string LibraryBridge::bridgeName() const
|
||||
|
||||
LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,12 +9,10 @@ namespace DB
|
||||
{
|
||||
LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -26,17 +24,17 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_ping")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_ping")
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(getContext());
|
||||
}
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_request")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_request")
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(getContext());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -13,7 +13,6 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex
|
||||
public:
|
||||
LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
@ -21,7 +20,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
const std::string name;
|
||||
const size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -87,10 +87,8 @@ static void writeData(Block data, OutputFormatPtr format)
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -137,7 +135,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
const String & dictionary_id = params.get("dictionary_id");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -374,10 +372,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -401,7 +397,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
|
||||
String res = library_handler ? "1" : "0";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
@ -412,11 +408,8 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
|
||||
size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -455,7 +448,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
const String & method = params.get("method");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}'", method);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -617,10 +610,8 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -634,7 +625,7 @@ void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & reque
|
||||
|
||||
String res = "1";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {}", res);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
|
@ -18,14 +18,13 @@ namespace DB
|
||||
class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
static constexpr auto FORMAT = "RowBinary";
|
||||
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -34,12 +33,11 @@ private:
|
||||
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -63,12 +61,11 @@ private:
|
||||
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -77,12 +74,11 @@ private:
|
||||
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
@ -1307,6 +1307,7 @@ try
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
|
||||
|
||||
SingleReadBufferIterator read_buffer_iterator(std::move(file));
|
||||
|
||||
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
|
||||
}
|
||||
else
|
||||
|
@ -202,10 +202,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(
|
||||
response,
|
||||
request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
|
||||
keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(columns.toString(), out);
|
||||
|
@ -15,18 +15,12 @@ namespace DB
|
||||
class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCColumnsInfoHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
|
||||
auto identifier = getIdentifierQuote(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(identifier, out);
|
||||
|
@ -14,18 +14,12 @@ namespace DB
|
||||
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("IdentifierQuoteHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
return;
|
||||
}
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -20,12 +20,10 @@ class ODBCHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCHandler(
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_,
|
||||
const String & mode_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, mode(mode_)
|
||||
{
|
||||
}
|
||||
@ -35,7 +33,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
|
||||
size_t keep_alive_timeout;
|
||||
String mode;
|
||||
|
||||
static inline std::mutex mutex;
|
||||
|
@ -27,7 +27,7 @@ std::string ODBCBridge::bridgeName() const
|
||||
|
||||
ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,11 +9,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger(name_)), name(name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -23,33 +20,33 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
|
||||
LOG_TRACE(log, "Request URI: {}", uri.toString());
|
||||
|
||||
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
return std::make_unique<PingHandler>(keep_alive_timeout);
|
||||
return std::make_unique<PingHandler>();
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
|
||||
if (uri.getPath() == "/columns_info")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/identifier_quote")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<IdentifierQuoteHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/schema_allowed")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<SchemaAllowedHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/write")
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "write");
|
||||
else
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "read");
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -17,14 +17,13 @@ namespace DB
|
||||
class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
std::string name;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerRes
|
||||
{
|
||||
try
|
||||
{
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
const char * data = "Ok.\n";
|
||||
response.sendBuffer(data, strlen(data));
|
||||
}
|
||||
|
@ -9,11 +9,7 @@ namespace DB
|
||||
class PingHandler : public HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
|
||||
bool result = isSchemaAllowed(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeBoolText(result, out);
|
||||
|
@ -17,18 +17,12 @@ class Context;
|
||||
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("SchemaAllowedHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2428,6 +2428,7 @@ void Server::createServers(
|
||||
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
|
||||
http_params->setTimeout(settings.http_receive_timeout);
|
||||
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
|
||||
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys protocols;
|
||||
config.keys("protocols", protocols);
|
||||
|
1
programs/server/config.d/transactions.xml
Symbolic link
1
programs/server/config.d/transactions.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/transactions.xml
|
@ -10,6 +10,7 @@
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
@ -25,6 +26,12 @@ static int64_t port = 9000;
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
void on_exit()
|
||||
{
|
||||
BaseDaemon::terminate();
|
||||
main_app.wait();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int LLVMFuzzerInitialize(int * argc, char ***argv)
|
||||
{
|
||||
@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
atexit(on_exit);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -780,12 +780,12 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
has_limit = true;
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
max_elems = parameters[0].safeGet<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
@ -816,11 +816,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[i].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[i].get<UInt64>() == 0))
|
||||
if ((type == Field::Types::Int64 && parameters[i].safeGet<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[i].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
return parameters[i].get<UInt64>();
|
||||
return parameters[i].safeGet<UInt64>();
|
||||
};
|
||||
|
||||
UInt64 max_elems = get_parameter(0);
|
||||
|
@ -83,16 +83,16 @@ public:
|
||||
if (version == 1)
|
||||
{
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
set.insert(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
|
||||
}
|
||||
else if (!set.empty())
|
||||
{
|
||||
typename State::Set new_set;
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
{
|
||||
typename State::Set::LookupResult set_value = set.find(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
typename State::Set::LookupResult set_value = set.find(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
|
||||
if (set_value != nullptr)
|
||||
new_set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
new_set.insert(static_cast<T>((*data_column)[offset + i].safeGet<T>()));
|
||||
}
|
||||
set = std::move(new_set);
|
||||
}
|
||||
|
@ -269,12 +269,12 @@ AggregateFunctionPtr createAggregateFunctionMoving(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
limit_size = true;
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
max_elems = parameters[0].safeGet<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
|
@ -397,11 +397,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
max_elems = parameters[0].safeGet<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
|
@ -247,7 +247,7 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
|
||||
if (type != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
|
||||
|
||||
delimiter = parameters[0].get<String>();
|
||||
delimiter = parameters[0].safeGet<String>();
|
||||
}
|
||||
if (parameters.size() == 2)
|
||||
{
|
||||
@ -256,12 +256,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
|
||||
if ((type == Field::Types::Int64 && parameters[1].safeGet<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[1].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].safeGet<Int64>());
|
||||
|
||||
has_limit = true;
|
||||
limit = parameters[1].get<UInt64>();
|
||||
limit = parameters[1].safeGet<UInt64>();
|
||||
}
|
||||
|
||||
if (has_limit)
|
||||
|
@ -323,12 +323,12 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
if ((type == Field::Types::Int64 && parameters[0].safeGet<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].safeGet<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
limit_size = true;
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
max_elems = parameters[0].safeGet<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
|
@ -238,7 +238,7 @@ public:
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
const auto & param = params[0].safeGet<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
@ -255,7 +255,7 @@ public:
|
||||
if (params[1].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
|
||||
|
||||
method = params[1].get<String>();
|
||||
method = params[1].safeGet<String>();
|
||||
if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
|
||||
"It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
|
||||
|
@ -181,7 +181,7 @@ public:
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
|
||||
|
||||
total_buckets = params[0].get<UInt64>();
|
||||
total_buckets = params[0].safeGet<UInt64>();
|
||||
|
||||
this->x_type = WhichDataType(arguments[0]).idx;
|
||||
this->y_type = WhichDataType(arguments[1]).idx;
|
||||
|
@ -152,7 +152,7 @@ public:
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
const auto & param = params[0].safeGet<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
@ -169,7 +169,7 @@ public:
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName());
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
continuity_correction = static_cast<bool>(params[1].safeGet<UInt64>());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
|
@ -117,7 +117,7 @@ public:
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName());
|
||||
|
||||
relative_accuracy = relative_accuracy_field.get<Float64>();
|
||||
relative_accuracy = relative_accuracy_field.safeGet<Float64>();
|
||||
|
||||
if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy))
|
||||
throw Exception(
|
||||
@ -147,9 +147,9 @@ public:
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
|
||||
|
||||
if (accuracy_field.getType() == Field::Types::Int64)
|
||||
accuracy = accuracy_field.get<Int64>();
|
||||
accuracy = accuracy_field.safeGet<Int64>();
|
||||
else
|
||||
accuracy = accuracy_field.get<UInt64>();
|
||||
accuracy = accuracy_field.safeGet<UInt64>();
|
||||
|
||||
if (accuracy <= 0)
|
||||
throw Exception(
|
||||
|
@ -300,12 +300,12 @@ public:
|
||||
/// Compatibility with previous versions.
|
||||
if (value.getType() == Field::Types::Decimal32)
|
||||
{
|
||||
auto source = value.get<DecimalField<Decimal32>>();
|
||||
auto source = value.safeGet<DecimalField<Decimal32>>();
|
||||
value = DecimalField<Decimal128>(source.getValue(), source.getScale());
|
||||
}
|
||||
else if (value.getType() == Field::Types::Decimal64)
|
||||
{
|
||||
auto source = value.get<DecimalField<Decimal64>>();
|
||||
auto source = value.safeGet<DecimalField<Decimal64>>();
|
||||
value = DecimalField<Decimal128>(source.getValue(), source.getScale());
|
||||
}
|
||||
|
||||
@ -355,7 +355,7 @@ public:
|
||||
/// Compatibility with previous versions.
|
||||
if (value.getType() == Field::Types::Decimal128)
|
||||
{
|
||||
auto source = value.get<DecimalField<Decimal128>>();
|
||||
auto source = value.safeGet<DecimalField<Decimal128>>();
|
||||
WhichDataType value_type(values_types[col_idx]);
|
||||
if (value_type.isDecimal32())
|
||||
{
|
||||
@ -560,7 +560,7 @@ private:
|
||||
template <typename FieldType>
|
||||
bool compareImpl(FieldType & x) const
|
||||
{
|
||||
auto val = rhs.get<FieldType>();
|
||||
auto val = rhs.safeGet<FieldType>();
|
||||
if (val > x)
|
||||
{
|
||||
x = val;
|
||||
@ -600,7 +600,7 @@ private:
|
||||
template <typename FieldType>
|
||||
bool compareImpl(FieldType & x) const
|
||||
{
|
||||
auto val = rhs.get<FieldType>();
|
||||
auto val = rhs.safeGet<FieldType>();
|
||||
if (val < x)
|
||||
{
|
||||
x = val;
|
||||
|
@ -1,2 +1,2 @@
|
||||
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
|
||||
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
|
||||
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
|
||||
|
@ -137,7 +137,7 @@ private:
|
||||
if (constant_node_value.getType() != Field::Types::Which::Tuple)
|
||||
return {};
|
||||
|
||||
const auto & constant_tuple = constant_node_value.get<const Tuple &>();
|
||||
const auto & constant_tuple = constant_node_value.safeGet<const Tuple &>();
|
||||
|
||||
const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes();
|
||||
size_t function_arguments_nodes_size = function_arguments_nodes.size();
|
||||
|
@ -89,7 +89,7 @@ public:
|
||||
if (!pattern || !isString(pattern->getResultType()))
|
||||
continue;
|
||||
|
||||
auto regexp = likePatternToRegexp(pattern->getValue().get<String>());
|
||||
auto regexp = likePatternToRegexp(pattern->getValue().safeGet<String>());
|
||||
/// Case insensitive. Works with UTF-8 as well.
|
||||
if (is_ilike)
|
||||
regexp = "(?i)" + regexp;
|
||||
|
@ -68,10 +68,10 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col
|
||||
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
|
||||
{
|
||||
if (value.getType() == Field::Types::String)
|
||||
return value.get<const String &>();
|
||||
return value.safeGet<const String &>();
|
||||
|
||||
if (value.getType() == Field::Types::UInt64)
|
||||
return data_type_tuple.getNameByPosition(value.get<UInt64>());
|
||||
return data_type_tuple.getNameByPosition(value.safeGet<UInt64>());
|
||||
|
||||
return "";
|
||||
}
|
||||
@ -79,7 +79,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & dat
|
||||
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
|
||||
{
|
||||
if (value.getType() == Field::Types::String)
|
||||
return value.get<const String &>();
|
||||
return value.safeGet<const String &>();
|
||||
|
||||
return "";
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector<QueryTreeNodePtr *> & nodes
|
||||
/// Sort nodes and parameters in ascending order of quantile level
|
||||
std::vector<size_t> permutation(nodes.size());
|
||||
iota(permutation.data(), permutation.size(), size_t(0));
|
||||
std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get<Float64>() < parameters[j].get<Float64>(); });
|
||||
std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].safeGet<Float64>() < parameters[j].safeGet<Float64>(); });
|
||||
|
||||
std::vector<QueryTreeNodePtr *> new_nodes;
|
||||
new_nodes.reserve(permutation.size());
|
||||
|
@ -134,8 +134,8 @@ public:
|
||||
return;
|
||||
|
||||
std::set<std::string> string_values;
|
||||
string_values.insert(first_literal->getValue().get<std::string>());
|
||||
string_values.insert(second_literal->getValue().get<std::string>());
|
||||
string_values.insert(first_literal->getValue().safeGet<std::string>());
|
||||
string_values.insert(second_literal->getValue().safeGet<std::string>());
|
||||
|
||||
changeIfArguments(*function_if_node, string_values, context);
|
||||
wrapIntoToString(*function_node, std::move(modified_if_node), context);
|
||||
@ -163,7 +163,7 @@ public:
|
||||
if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType()))
|
||||
return;
|
||||
|
||||
auto array_to = literal_to->getValue().get<Array>();
|
||||
auto array_to = literal_to->getValue().safeGet<Array>();
|
||||
|
||||
if (array_to.empty())
|
||||
return;
|
||||
@ -178,9 +178,9 @@ public:
|
||||
std::set<std::string> string_values;
|
||||
|
||||
for (const auto & value : array_to)
|
||||
string_values.insert(value.get<std::string>());
|
||||
string_values.insert(value.safeGet<std::string>());
|
||||
|
||||
string_values.insert(literal_default->getValue().get<std::string>());
|
||||
string_values.insert(literal_default->getValue().safeGet<std::string>());
|
||||
|
||||
changeTransformArguments(*function_modified_transform_node, string_values, context);
|
||||
wrapIntoToString(*function_node, std::move(modified_transform_node), context);
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
}
|
||||
else if (function_node->getFunctionName() == "sum" &&
|
||||
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
|
||||
first_argument_constant_literal.get<UInt64>() == 1)
|
||||
first_argument_constant_literal.safeGet<UInt64>() == 1)
|
||||
{
|
||||
function_node->getArguments().getNodes().clear();
|
||||
resolveAggregateFunctionNodeByName(*function_node, "count");
|
||||
|
@ -143,13 +143,13 @@ private:
|
||||
const auto & column_type = column_node_typed.getColumnType().get();
|
||||
if (isDateOrDate32(column_type))
|
||||
{
|
||||
start_date_or_date_time = date_lut.dateToString(range.first.get<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.dateToString(range.second.get<DateLUTImpl::Time>());
|
||||
start_date_or_date_time = date_lut.dateToString(range.first.safeGet<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.dateToString(range.second.safeGet<DateLUTImpl::Time>());
|
||||
}
|
||||
else if (isDateTime(column_type) || isDateTime64(column_type))
|
||||
{
|
||||
start_date_or_date_time = date_lut.timeToString(range.first.get<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.timeToString(range.second.get<DateLUTImpl::Time>());
|
||||
start_date_or_date_time = date_lut.timeToString(range.first.safeGet<DateLUTImpl::Time>());
|
||||
end_date_or_date_time = date_lut.timeToString(range.second.safeGet<DateLUTImpl::Time>());
|
||||
}
|
||||
else [[unlikely]]
|
||||
return {};
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
{
|
||||
const auto & second_const_value = second_const_node->getValue();
|
||||
if (second_const_value.isNull()
|
||||
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0
|
||||
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.safeGet<UInt64>() == 0
|
||||
&& !if_node->getResultType()->isNullable()))
|
||||
{
|
||||
/// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond)
|
||||
@ -89,7 +89,7 @@ public:
|
||||
{
|
||||
const auto & first_const_value = first_const_node->getValue();
|
||||
if (first_const_value.isNull()
|
||||
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0
|
||||
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.safeGet<UInt64>() == 0
|
||||
&& !if_node->getResultType()->isNullable()))
|
||||
{
|
||||
/// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond))
|
||||
|
@ -66,7 +66,7 @@ public:
|
||||
|
||||
resolveAggregateFunctionNodeByName(*function_node, "countIf");
|
||||
|
||||
if (constant_value_literal.get<UInt64>() != 1)
|
||||
if (constant_value_literal.safeGet<UInt64>() != 1)
|
||||
{
|
||||
/// Rewrite `sumIf(123, cond)` into `123 * countIf(cond)`
|
||||
node = getMultiplyFunction(std::move(multiplier_node), node);
|
||||
@ -105,8 +105,8 @@ public:
|
||||
const auto & if_true_condition_constant_value_literal = if_true_condition_constant_node->getValue();
|
||||
const auto & if_false_condition_constant_value_literal = if_false_condition_constant_node->getValue();
|
||||
|
||||
auto if_true_condition_value = if_true_condition_constant_value_literal.get<UInt64>();
|
||||
auto if_false_condition_value = if_false_condition_constant_value_literal.get<UInt64>();
|
||||
auto if_true_condition_value = if_true_condition_constant_value_literal.safeGet<UInt64>();
|
||||
auto if_false_condition_value = if_false_condition_constant_value_literal.safeGet<UInt64>();
|
||||
|
||||
if (if_false_condition_value == 0)
|
||||
{
|
||||
|
@ -471,7 +471,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
|
||||
|
||||
std::shared_ptr<Collator> collator;
|
||||
if (order_by_element.getCollation())
|
||||
collator = std::make_shared<Collator>(order_by_element.getCollation()->as<ASTLiteral &>().value.get<String &>());
|
||||
collator = std::make_shared<Collator>(order_by_element.getCollation()->as<ASTLiteral &>().value.safeGet<String &>());
|
||||
|
||||
const auto & sort_expression_ast = order_by_element.children.at(0);
|
||||
auto sort_expression = buildExpression(sort_expression_ast, context);
|
||||
|
@ -1273,7 +1273,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns(
|
||||
const auto & constant_node_value = constant_node.getValue();
|
||||
if (constant_node_value.getType() == Field::Types::String)
|
||||
{
|
||||
array_join_subcolumn_prefix = constant_node_value.get<String>() + ".";
|
||||
array_join_subcolumn_prefix = constant_node_value.safeGet<String>() + ".";
|
||||
array_join_parent_column = argument_nodes.at(0).get();
|
||||
}
|
||||
}
|
||||
@ -1287,7 +1287,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns(
|
||||
if (!second_argument || second_argument->getValue().getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree());
|
||||
|
||||
const auto & resolved_subcolumn_path = second_argument->getValue().get<String &>();
|
||||
const auto & resolved_subcolumn_path = second_argument->getValue().safeGet<String &>();
|
||||
if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix))
|
||||
return {};
|
||||
|
||||
@ -1331,7 +1331,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression
|
||||
size_t nested_function_arguments_size = nested_function_arguments.size();
|
||||
|
||||
const auto & nested_keys_names_constant_node = nested_function_arguments[0]->as<ConstantNode & >();
|
||||
const auto & nested_keys_names = nested_keys_names_constant_node.getValue().get<Array &>();
|
||||
const auto & nested_keys_names = nested_keys_names_constant_node.getValue().safeGet<Array &>();
|
||||
size_t nested_keys_names_size = nested_keys_names.size();
|
||||
|
||||
if (nested_keys_names_size == nested_function_arguments_size - 1)
|
||||
@ -1344,7 +1344,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression
|
||||
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
|
||||
array_join_column_expression_typed.getColumnSource());
|
||||
|
||||
const auto & nested_key_name = nested_keys_names[i - 1].get<String &>();
|
||||
const auto & nested_key_name = nested_keys_names[i - 1].safeGet<String &>();
|
||||
Identifier nested_identifier = Identifier(nested_key_name);
|
||||
array_join_resolved_expression = wrapExpressionNodeInTupleElement(array_join_column, nested_identifier, scope.context);
|
||||
break;
|
||||
|
@ -748,11 +748,11 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
|
||||
UInt64 pos;
|
||||
if (constant_node->getValue().getType() == Field::Types::UInt64)
|
||||
{
|
||||
pos = constant_node->getValue().get<UInt64>();
|
||||
pos = constant_node->getValue().safeGet<UInt64>();
|
||||
}
|
||||
else // Int64
|
||||
{
|
||||
auto value = constant_node->getValue().get<Int64>();
|
||||
auto value = constant_node->getValue().safeGet<Int64>();
|
||||
if (value > 0)
|
||||
pos = value;
|
||||
else
|
||||
|
@ -99,7 +99,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes&
|
||||
"Invalid type in set. Expected tuple, got {}",
|
||||
value.getTypeName());
|
||||
|
||||
const auto & tuple = value.template get<const Tuple &>();
|
||||
const auto & tuple = value.template safeGet<const Tuple &>();
|
||||
const DataTypePtr & value_type = value_types[collection_index];
|
||||
const DataTypes & tuple_value_type = typeid_cast<const DataTypeTuple *>(value_type.get())->getElements();
|
||||
|
||||
@ -175,15 +175,15 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const
|
||||
if (rhs_which_type.isArray())
|
||||
{
|
||||
const DataTypeArray * value_array_type = assert_cast<const DataTypeArray *>(value_type.get());
|
||||
size_t value_array_size = value.get<const Array &>().size();
|
||||
size_t value_array_size = value.safeGet<const Array &>().size();
|
||||
DataTypes value_types(value_array_size, value_array_type->getNestedType());
|
||||
result_block = createBlockFromCollection(value.get<const Array &>(), value_types, set_element_types, transform_null_in);
|
||||
result_block = createBlockFromCollection(value.safeGet<const Array &>(), value_types, set_element_types, transform_null_in);
|
||||
}
|
||||
else if (rhs_which_type.isTuple())
|
||||
{
|
||||
const DataTypeTuple * value_tuple_type = assert_cast<const DataTypeTuple *>(value_type.get());
|
||||
const DataTypes & value_types = value_tuple_type->getElements();
|
||||
result_block = createBlockFromCollection(value.get<const Tuple &>(), value_types, set_element_types, transform_null_in);
|
||||
result_block = createBlockFromCollection(value.safeGet<const Tuple &>(), value_types, set_element_types, transform_null_in);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
|
@ -126,7 +126,7 @@ std::vector<Strings> BackupSettings::Util::clusterHostIDsFromAST(const IAST & as
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
|
||||
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
|
||||
const auto & replicas = array_of_replicas->value.get<const Array &>();
|
||||
const auto & replicas = array_of_replicas->value.safeGet<const Array &>();
|
||||
res[i].resize(replicas.size());
|
||||
for (size_t j = 0; j != replicas.size(); ++j)
|
||||
{
|
||||
@ -135,7 +135,7 @@ std::vector<Strings> BackupSettings::Util::clusterHostIDsFromAST(const IAST & as
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
|
||||
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
|
||||
res[i][j] = replica.get<const String &>();
|
||||
res[i][j] = replica.safeGet<const String &>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -490,6 +490,8 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
|
||||
/// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously.
|
||||
auto process_list_element = context_in_use->getProcessListElement();
|
||||
/// Update context to preserve query information in processlist (settings, current_database)
|
||||
process_list_element->updateContext(context_in_use);
|
||||
|
||||
thread_pool.scheduleOrThrowOnError(
|
||||
[this,
|
||||
@ -853,6 +855,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
|
||||
/// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously.
|
||||
auto process_list_element = context_in_use->getProcessListElement();
|
||||
/// Update context to preserve query information in processlist (settings, current_database)
|
||||
process_list_element->updateContext(context_in_use);
|
||||
|
||||
thread_pool.scheduleOrThrowOnError(
|
||||
[this,
|
||||
|
@ -46,8 +46,8 @@ namespace
|
||||
if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) &&
|
||||
replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String))
|
||||
{
|
||||
String & zookeeper_path_arg = zookeeper_path_ast->value.get<String>();
|
||||
String & replica_name_arg = replica_name_ast->value.get<String>();
|
||||
String & zookeeper_path_arg = zookeeper_path_ast->value.safeGet<String>();
|
||||
String & replica_name_arg = replica_name_ast->value.safeGet<String>();
|
||||
if (create.uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
String table_uuid_str = toString(create.uuid);
|
||||
|
@ -31,7 +31,7 @@ namespace
|
||||
{
|
||||
if (field.getType() == Field::Types::String)
|
||||
{
|
||||
const String & str = field.get<const String &>();
|
||||
const String & str = field.safeGet<const String &>();
|
||||
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
|
||||
{
|
||||
value = RestoreTableCreationMode::kCreate;
|
||||
@ -54,7 +54,7 @@ namespace
|
||||
|
||||
if (field.getType() == Field::Types::UInt64)
|
||||
{
|
||||
UInt64 number = field.get<UInt64>();
|
||||
UInt64 number = field.safeGet<UInt64>();
|
||||
if (number == 1)
|
||||
{
|
||||
value = RestoreTableCreationMode::kCreate;
|
||||
@ -95,7 +95,7 @@ namespace
|
||||
{
|
||||
if (field.getType() == Field::Types::String)
|
||||
{
|
||||
const String & str = field.get<const String &>();
|
||||
const String & str = field.safeGet<const String &>();
|
||||
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
|
||||
{
|
||||
value = RestoreAccessCreationMode::kCreate;
|
||||
@ -118,7 +118,7 @@ namespace
|
||||
|
||||
if (field.getType() == Field::Types::UInt64)
|
||||
{
|
||||
UInt64 number = field.get<UInt64>();
|
||||
UInt64 number = field.safeGet<UInt64>();
|
||||
if (number == 1)
|
||||
{
|
||||
value = RestoreAccessCreationMode::kCreate;
|
||||
|
@ -19,7 +19,7 @@ SettingFieldOptionalString::SettingFieldOptionalString(const Field & field)
|
||||
|
||||
if (field.getType() == Field::Types::String)
|
||||
{
|
||||
value = field.get<const String &>();
|
||||
value = field.safeGet<const String &>();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ namespace ErrorCodes
|
||||
|
||||
if (field.getType() == Field::Types::String)
|
||||
{
|
||||
const String & str = field.get<const String &>();
|
||||
const String & str = field.safeGet<const String &>();
|
||||
if (str.empty())
|
||||
{
|
||||
value = std::nullopt;
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_INCLUDE_WHAT_YOU_USE)
|
||||
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
|
||||
@ -602,10 +602,6 @@ endif()
|
||||
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing)
|
||||
|
||||
if (TARGET ch_contrib::annoy)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::usearch)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
|
||||
endif()
|
||||
|
@ -2751,7 +2751,7 @@ void ClientBase::runLibFuzzer()
|
||||
for (auto & arg : fuzzer_args_holder)
|
||||
fuzzer_args.emplace_back(arg.data());
|
||||
|
||||
int fuzzer_argc = fuzzer_args.size();
|
||||
int fuzzer_argc = static_cast<int>(fuzzer_args.size());
|
||||
char ** fuzzer_argv = fuzzer_args.data();
|
||||
|
||||
LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)
|
||||
|
@ -214,7 +214,7 @@ void Suggest::fillWordsFromBlock(const Block & block)
|
||||
Words new_words;
|
||||
new_words.reserve(rows);
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
new_words.emplace_back(column[i].get<String>());
|
||||
new_words.emplace_back(column[i].safeGet<String>());
|
||||
|
||||
addWords(std::move(new_words));
|
||||
}
|
||||
|
@ -457,9 +457,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const
|
||||
Field ColumnAggregateFunction::operator[](size_t n) const
|
||||
{
|
||||
Field field = AggregateFunctionStateData();
|
||||
field.get<AggregateFunctionStateData &>().name = type_string;
|
||||
field.safeGet<AggregateFunctionStateData &>().name = type_string;
|
||||
{
|
||||
WriteBufferFromString buffer(field.get<AggregateFunctionStateData &>().data);
|
||||
WriteBufferFromString buffer(field.safeGet<AggregateFunctionStateData &>().data);
|
||||
func->serialize(data[n], buffer, version);
|
||||
}
|
||||
return field;
|
||||
@ -467,12 +467,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const
|
||||
|
||||
void ColumnAggregateFunction::get(size_t n, Field & res) const
|
||||
{
|
||||
res = AggregateFunctionStateData();
|
||||
res.get<AggregateFunctionStateData &>().name = type_string;
|
||||
{
|
||||
WriteBufferFromString buffer(res.get<AggregateFunctionStateData &>().data);
|
||||
func->serialize(data[n], buffer, version);
|
||||
}
|
||||
res = operator[](n);
|
||||
}
|
||||
|
||||
StringRef ColumnAggregateFunction::getDataAt(size_t n) const
|
||||
@ -552,7 +547,7 @@ void ColumnAggregateFunction::insert(const Field & x)
|
||||
"Inserting field of type {} into ColumnAggregateFunction. Expected {}",
|
||||
x.getTypeName(), Field::Types::AggregateFunctionState);
|
||||
|
||||
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
|
||||
const auto & field_name = x.safeGet<const AggregateFunctionStateData &>().name;
|
||||
if (type_string != field_name)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot insert filed with type {} into column with type {}",
|
||||
field_name, type_string);
|
||||
@ -560,7 +555,7 @@ void ColumnAggregateFunction::insert(const Field & x)
|
||||
ensureOwnership();
|
||||
Arena & arena = createOrGetArena();
|
||||
pushBackAndCreateState(data, arena, func.get());
|
||||
ReadBufferFromString read_buffer(x.get<const AggregateFunctionStateData &>().data);
|
||||
ReadBufferFromString read_buffer(x.safeGet<const AggregateFunctionStateData &>().data);
|
||||
func->deserialize(data.back(), read_buffer, version, &arena);
|
||||
}
|
||||
|
||||
@ -569,14 +564,14 @@ bool ColumnAggregateFunction::tryInsert(const DB::Field & x)
|
||||
if (x.getType() != Field::Types::AggregateFunctionState)
|
||||
return false;
|
||||
|
||||
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
|
||||
const auto & field_name = x.safeGet<const AggregateFunctionStateData &>().name;
|
||||
if (type_string != field_name)
|
||||
return false;
|
||||
|
||||
ensureOwnership();
|
||||
Arena & arena = createOrGetArena();
|
||||
pushBackAndCreateState(data, arena, func.get());
|
||||
ReadBufferFromString read_buffer(x.get<const AggregateFunctionStateData &>().data);
|
||||
ReadBufferFromString read_buffer(x.safeGet<const AggregateFunctionStateData &>().data);
|
||||
func->deserialize(data.back(), read_buffer, version, &arena);
|
||||
return true;
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const
|
||||
size, max_array_size_as_field);
|
||||
|
||||
res = Array();
|
||||
Array & res_arr = res.get<Array &>();
|
||||
Array & res_arr = res.safeGet<Array &>();
|
||||
res_arr.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -309,7 +309,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const
|
||||
|
||||
void ColumnArray::insert(const Field & x)
|
||||
{
|
||||
const Array & array = x.get<const Array &>();
|
||||
const Array & array = x.safeGet<const Array &>();
|
||||
size_t size = array.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
getData().insert(array[i]);
|
||||
@ -321,7 +321,7 @@ bool ColumnArray::tryInsert(const Field & x)
|
||||
if (x.getType() != Field::Types::Which::Array)
|
||||
return false;
|
||||
|
||||
const Array & array = x.get<const Array &>();
|
||||
const Array & array = x.safeGet<const Array &>();
|
||||
size_t size = array.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
@ -452,6 +452,22 @@ void ColumnArray::reserve(size_t n)
|
||||
getData().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
|
||||
}
|
||||
|
||||
void ColumnArray::prepareForSquashing(const Columns & source_columns)
|
||||
{
|
||||
size_t new_size = size();
|
||||
Columns source_data_columns;
|
||||
source_data_columns.reserve(source_columns.size());
|
||||
for (const auto & source_column : source_columns)
|
||||
{
|
||||
const auto & source_array_column = assert_cast<const ColumnArray &>(*source_column);
|
||||
new_size += source_array_column.size();
|
||||
source_data_columns.push_back(source_array_column.getDataPtr());
|
||||
}
|
||||
|
||||
getOffsets().reserve_exact(new_size);
|
||||
data->prepareForSquashing(source_data_columns);
|
||||
}
|
||||
|
||||
void ColumnArray::shrinkToFit()
|
||||
{
|
||||
getOffsets().shrink_to_fit();
|
||||
|
@ -118,6 +118,7 @@ public:
|
||||
void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
void reserve(size_t n) override;
|
||||
void prepareForSquashing(const Columns & source_columns) override;
|
||||
void shrinkToFit() override;
|
||||
void ensureOwnership() override;
|
||||
size_t byteSize() const override;
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
void insertData(const char * src, size_t /*length*/) override;
|
||||
void insertDefault() override { data.push_back(T()); }
|
||||
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
|
||||
void insert(const Field & x) override { data.push_back(x.get<T>()); }
|
||||
void insert(const Field & x) override { data.push_back(x.safeGet<T>()); }
|
||||
bool tryInsert(const Field & x) override;
|
||||
#if !defined(DEBUG_OR_SANITIZER_BUILD)
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -643,6 +643,116 @@ ColumnPtr ColumnDynamic::compress() const
|
||||
});
|
||||
}
|
||||
|
||||
void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
|
||||
{
|
||||
if (source_columns.empty())
|
||||
return;
|
||||
|
||||
/// Internal variants of source dynamic columns may differ.
|
||||
/// We want to preallocate memory for all variants we will have after squashing.
|
||||
/// It may happen that the total number of variants in source columns will
|
||||
/// exceed the limit, in this case we will choose the most frequent variants.
|
||||
|
||||
/// First, preallocate memory for variant discriminators and offsets.
|
||||
size_t new_size = size();
|
||||
for (const auto & source_column : source_columns)
|
||||
new_size += source_column->size();
|
||||
auto & variant_col = getVariantColumn();
|
||||
variant_col.getLocalDiscriminators().reserve_exact(new_size);
|
||||
variant_col.getOffsets().reserve_exact(new_size);
|
||||
|
||||
/// Second, collect all variants and their total sizes.
|
||||
std::unordered_map<String, size_t> total_variant_sizes;
|
||||
DataTypes all_variants;
|
||||
|
||||
auto add_variants = [&](const ColumnDynamic & source_dynamic)
|
||||
{
|
||||
const auto & source_variant_column = source_dynamic.getVariantColumn();
|
||||
const auto & source_variant_info = source_dynamic.getVariantInfo();
|
||||
const auto & source_variants = assert_cast<const DataTypeVariant &>(*source_variant_info.variant_type).getVariants();
|
||||
|
||||
for (size_t i = 0; i != source_variants.size(); ++i)
|
||||
{
|
||||
const auto & variant_name = source_variant_info.variant_names[i];
|
||||
auto it = total_variant_sizes.find(variant_name);
|
||||
/// Add this variant to the list of all variants if we didn't see it yet.
|
||||
if (it == total_variant_sizes.end())
|
||||
{
|
||||
all_variants.push_back(source_variants[i]);
|
||||
it = total_variant_sizes.emplace(variant_name, 0).first;
|
||||
}
|
||||
|
||||
it->second += source_variant_column.getVariantByGlobalDiscriminator(i).size();
|
||||
}
|
||||
};
|
||||
|
||||
for (const auto & source_column : source_columns)
|
||||
add_variants(assert_cast<const ColumnDynamic &>(*source_column));
|
||||
|
||||
/// Add variants from this dynamic column.
|
||||
add_variants(*this);
|
||||
|
||||
DataTypePtr result_variant_type;
|
||||
/// Check if the number of all variants exceeds the limit.
|
||||
if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String")))
|
||||
{
|
||||
/// We want to keep the most frequent variants in the resulting dynamic column.
|
||||
DataTypes result_variants;
|
||||
result_variants.reserve(max_dynamic_types);
|
||||
/// Add variants from current variant column as we will not rewrite it.
|
||||
for (const auto & variant : assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants())
|
||||
result_variants.push_back(variant);
|
||||
/// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit.
|
||||
if (!variant_info.variant_name_to_discriminator.contains("String"))
|
||||
result_variants.push_back(std::make_shared<DataTypeString>());
|
||||
|
||||
/// Create list of remaining variants with their sizes and sort it.
|
||||
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
|
||||
variants_with_sizes.reserve(all_variants.size() - variant_info.variant_names.size());
|
||||
for (const auto & variant : all_variants)
|
||||
{
|
||||
/// Add variant to the list only of we didn't add it yet.
|
||||
auto variant_name = variant->getName();
|
||||
if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
|
||||
variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant);
|
||||
}
|
||||
|
||||
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
|
||||
/// Add the most frequent variants until we reach max_dynamic_types.
|
||||
size_t num_new_variants = max_dynamic_types - result_variants.size();
|
||||
for (size_t i = 0; i != num_new_variants; ++i)
|
||||
result_variants.push_back(variants_with_sizes[i].second);
|
||||
|
||||
result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
|
||||
}
|
||||
else
|
||||
{
|
||||
result_variant_type = std::make_shared<DataTypeVariant>(all_variants);
|
||||
}
|
||||
|
||||
if (!result_variant_type->equals(*variant_info.variant_type))
|
||||
updateVariantInfoAndExpandVariantColumn(result_variant_type);
|
||||
|
||||
/// Now current dynamic column has all resulting variants and we can call
|
||||
/// prepareForSquashing on them to preallocate the memory.
|
||||
for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
|
||||
{
|
||||
Columns source_variant_columns;
|
||||
source_variant_columns.reserve(source_columns.size());
|
||||
for (const auto & source_column : source_columns)
|
||||
{
|
||||
const auto & source_dynamic_column = assert_cast<const ColumnDynamic &>(*source_column);
|
||||
const auto & source_variant_info = source_dynamic_column.getVariantInfo();
|
||||
/// Try to find this variant in the current source column.
|
||||
auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]);
|
||||
if (it != source_variant_info.variant_name_to_discriminator.end())
|
||||
source_variant_columns.push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second));
|
||||
}
|
||||
|
||||
variant_col.getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
|
||||
{
|
||||
if (!empty())
|
||||
|
@ -254,6 +254,8 @@ public:
|
||||
variant_column->reserve(n);
|
||||
}
|
||||
|
||||
void prepareForSquashing(const Columns & source_columns) override;
|
||||
|
||||
void ensureOwnership() override
|
||||
{
|
||||
variant_column->ensureOwnership();
|
||||
|
@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
|
||||
|
||||
void ColumnFixedString::insert(const Field & x)
|
||||
{
|
||||
const String & s = x.get<const String &>();
|
||||
const String & s = x.safeGet<const String &>();
|
||||
insertData(s.data(), s.size());
|
||||
}
|
||||
|
||||
@ -67,7 +67,7 @@ bool ColumnFixedString::tryInsert(const Field & x)
|
||||
{
|
||||
if (x.getType() != Field::Types::Which::String)
|
||||
return false;
|
||||
const String & s = x.get<const String &>();
|
||||
const String & s = x.safeGet<const String &>();
|
||||
if (s.size() > n)
|
||||
return false;
|
||||
insertData(s.data(), s.size());
|
||||
|
@ -72,7 +72,7 @@ void ColumnMap::get(size_t n, Field & res) const
|
||||
size_t size = offsets[n] - offsets[n - 1];
|
||||
|
||||
res = Map();
|
||||
auto & map = res.get<Map &>();
|
||||
auto & map = res.safeGet<Map &>();
|
||||
map.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -96,7 +96,7 @@ void ColumnMap::insertData(const char *, size_t)
|
||||
|
||||
void ColumnMap::insert(const Field & x)
|
||||
{
|
||||
const auto & map = x.get<const Map &>();
|
||||
const auto & map = x.safeGet<const Map &>();
|
||||
nested->insert(Array(map.begin(), map.end()));
|
||||
}
|
||||
|
||||
@ -105,7 +105,7 @@ bool ColumnMap::tryInsert(const Field & x)
|
||||
if (x.getType() != Field::Types::Which::Map)
|
||||
return false;
|
||||
|
||||
const auto & map = x.get<const Map &>();
|
||||
const auto & map = x.safeGet<const Map &>();
|
||||
return nested->tryInsert(Array(map.begin(), map.end()));
|
||||
}
|
||||
|
||||
@ -249,6 +249,15 @@ void ColumnMap::reserve(size_t n)
|
||||
nested->reserve(n);
|
||||
}
|
||||
|
||||
void ColumnMap::prepareForSquashing(const Columns & source_columns)
|
||||
{
|
||||
Columns nested_source_columns;
|
||||
nested_source_columns.reserve(source_columns.size());
|
||||
for (const auto & source_column : source_columns)
|
||||
nested_source_columns.push_back(assert_cast<const ColumnMap &>(*source_column).getNestedColumnPtr());
|
||||
nested->prepareForSquashing(nested_source_columns);
|
||||
}
|
||||
|
||||
void ColumnMap::shrinkToFit()
|
||||
{
|
||||
nested->shrinkToFit();
|
||||
@ -288,8 +297,8 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
|
||||
|
||||
/// Convert result Array fields to Map fields because client expect min and max field to have type Map
|
||||
|
||||
Array nested_min_value = nested_min.get<Array>();
|
||||
Array nested_max_value = nested_max.get<Array>();
|
||||
Array nested_min_value = nested_min.safeGet<Array>();
|
||||
Array nested_max_value = nested_max.safeGet<Array>();
|
||||
|
||||
Map map_min_value(nested_min_value.begin(), nested_min_value.end());
|
||||
Map map_max_value(nested_max_value.begin(), nested_max_value.end());
|
||||
|
@ -94,6 +94,7 @@ public:
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
void reserve(size_t n) override;
|
||||
void prepareForSquashing(const Columns & source_columns) override;
|
||||
void shrinkToFit() override;
|
||||
void ensureOwnership() override;
|
||||
size_t byteSize() const override;
|
||||
|
@ -706,6 +706,22 @@ void ColumnNullable::reserve(size_t n)
|
||||
getNullMapData().reserve(n);
|
||||
}
|
||||
|
||||
void ColumnNullable::prepareForSquashing(const Columns & source_columns)
|
||||
{
|
||||
size_t new_size = size();
|
||||
Columns nested_source_columns;
|
||||
nested_source_columns.reserve(source_columns.size());
|
||||
for (const auto & source_column : source_columns)
|
||||
{
|
||||
const auto & source_nullable_column = assert_cast<const ColumnNullable &>(*source_column);
|
||||
new_size += source_nullable_column.size();
|
||||
nested_source_columns.push_back(source_nullable_column.getNestedColumnPtr());
|
||||
}
|
||||
|
||||
nested_column->prepareForSquashing(nested_source_columns);
|
||||
getNullMapData().reserve(new_size);
|
||||
}
|
||||
|
||||
void ColumnNullable::shrinkToFit()
|
||||
{
|
||||
getNestedColumn().shrinkToFit();
|
||||
|
@ -125,6 +125,7 @@ public:
|
||||
size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
|
||||
void reserve(size_t n) override;
|
||||
void prepareForSquashing(const Columns & source_columns) override;
|
||||
void shrinkToFit() override;
|
||||
void ensureOwnership() override;
|
||||
size_t byteSize() const override;
|
||||
|
@ -698,7 +698,7 @@ void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback ca
|
||||
|
||||
void ColumnObject::insert(const Field & field)
|
||||
{
|
||||
const auto & object = field.get<const Object &>();
|
||||
const auto & object = field.safeGet<const Object &>();
|
||||
|
||||
HashSet<StringRef, StringRefHash> inserted_paths;
|
||||
size_t old_size = size();
|
||||
@ -754,7 +754,7 @@ void ColumnObject::get(size_t n, Field & res) const
|
||||
{
|
||||
assert(n < size());
|
||||
res = Object();
|
||||
auto & object = res.get<Object &>();
|
||||
auto & object = res.safeGet<Object &>();
|
||||
|
||||
for (const auto & entry : subcolumns)
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user