mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
Merge branch 'master' into fix-test-throttling
This commit is contained in:
commit
b2b0f9e661
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
|
||||
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
|
||||
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
|
||||
---
|
||||
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
|
||||
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
|
||||
---
|
||||
- [ ] <!---do_not_test--> Do not test
|
||||
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
|
||||
- [ ] <!---upload_all--> Upload binaries for special builds
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -230,9 +230,6 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/annoy"]
|
||||
path = contrib/annoy
|
||||
url = https://github.com/ClickHouse/annoy
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -58,6 +58,10 @@ namespace Net
|
||||
|
||||
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
|
||||
|
||||
size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
|
||||
|
||||
size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
|
||||
|
||||
private:
|
||||
bool _firstRequest;
|
||||
Poco::Timespan _keepAliveTimeout;
|
||||
|
@ -19,11 +19,11 @@ namespace Poco {
|
||||
namespace Net {
|
||||
|
||||
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
|
||||
HTTPSession(socket, pParams->getKeepAlive()),
|
||||
_firstRequest(true),
|
||||
_keepAliveTimeout(pParams->getKeepAliveTimeout()),
|
||||
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
|
||||
: HTTPSession(socket, pParams->getKeepAlive())
|
||||
, _firstRequest(true)
|
||||
, _keepAliveTimeout(pParams->getKeepAliveTimeout())
|
||||
, _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
{
|
||||
setTimeout(pParams->getTimeout());
|
||||
}
|
||||
@ -56,7 +56,8 @@ bool HTTPServerSession::hasMoreRequests()
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else return false;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
message (STATUS "Enabled instrumentation for code coverage")
|
||||
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
|
||||
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
endif()
|
||||
|
||||
|
3
contrib/CMakeLists.txt
vendored
3
contrib/CMakeLists.txt
vendored
@ -205,9 +205,8 @@ add_contrib (morton-nd-cmake morton-nd)
|
||||
if (ARCH_S390X)
|
||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||
endif()
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
|
||||
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_USEARCH)
|
||||
add_contrib (FP16-cmake FP16)
|
||||
add_contrib (robin-map-cmake robin-map)
|
||||
|
1
contrib/annoy
vendored
1
contrib/annoy
vendored
@ -1 +0,0 @@
|
||||
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956
|
@ -1,24 +0,0 @@
|
||||
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
|
||||
|
||||
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
|
||||
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
|
||||
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
|
||||
# https://github.com/spotify/annoy/issues/456
|
||||
# Problem with aligment can lead to errors like
|
||||
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
|
||||
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
|
||||
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
|
||||
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
|
||||
message (STATUS "Not using annoy")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
|
||||
add_library(_annoy INTERFACE)
|
||||
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
||||
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
|
||||
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
|
2
contrib/libprotobuf-mutator
vendored
2
contrib/libprotobuf-mutator
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
|
||||
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa
|
@ -1,9 +1,7 @@
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
|
||||
|
||||
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
|
||||
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
|
||||
add_library(_usearch INTERFACE)
|
||||
|
||||
@ -11,7 +9,6 @@ target_include_directories(_usearch SYSTEM INTERFACE
|
||||
${FP16_PROJECT_DIR}/include
|
||||
${ROBIN_MAP_PROJECT_DIR}/include
|
||||
${SIMSIMD_PROJECT_DIR}/include
|
||||
${USEARCH_SOURCE_DIR})
|
||||
${USEARCH_PROJECT_DIR}/include)
|
||||
|
||||
add_library(ch_contrib::usearch ALIAS _usearch)
|
||||
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
|
||||
|
@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
|
||||
mv ./programs/clickhouse* /output ||:
|
||||
mv ./programs/*_fuzzer /output ||:
|
||||
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
|
||||
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
|
||||
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output
|
||||
|
@ -99,10 +99,9 @@ upload_data() {
|
||||
# iterating over globs will cause redundant file variable to be
|
||||
# a path to a file, not a filename
|
||||
# shellcheck disable=SC2045
|
||||
for file in $(ls "${data_path}"); do
|
||||
echo "${file}";
|
||||
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
|
||||
done
|
||||
if [ -d "${data_path}" ]; then
|
||||
./mc cp --recursive "${data_path}"/ clickminio/test/
|
||||
fi
|
||||
}
|
||||
|
||||
setup_aws_credentials() {
|
||||
|
@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
@ -27,75 +27,109 @@ Function `Distance` computes the distance between two vectors. Often, the Euclid
|
||||
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
|
||||
0.33, ...)`, and `N` limits the number of search results.
|
||||
|
||||
An alternative formulation of the nearest neighborhood search problem looks as follows:
|
||||
This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
|
||||
situations where `MaxDistance` is difficult to determine in advance.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
|
||||
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
|
||||
`MaxDistance` is difficult to determine in advance.
|
||||
|
||||
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
|
||||
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
|
||||
|
||||
# Creating and Using ANN Indexes {#creating_using_ann_indexes}
|
||||
# Creating and Using Vector Similarity Indexes
|
||||
|
||||
Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ann_index
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Array(Float32),
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Parameters:
|
||||
- `method`: Supports currently only `hnsw`.
|
||||
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
|
||||
line between two points in Euclidean space), or `cosineDistance` (the [cosine
|
||||
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
|
||||
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
|
||||
- `m`: the number of neighbors per graph node (optional, default: 16)
|
||||
- `ef_construction`: (optional, default: 128)
|
||||
- `ef_search`: (optional, default: 64)
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
|
||||
to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
|
||||
Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
|
||||
files, without loading them into RAM.
|
||||
|
||||
USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
|
||||
|
||||
Vector similarity indexes currently support two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
|
||||
If no scalar kind was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
:::note
|
||||
The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support two types of queries:
|
||||
|
||||
- ORDER BY queries:
|
||||
ANN indexes support these queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query
|
||||
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
|
||||
```bash
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
```
|
||||
:::
|
||||
|
||||
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
|
||||
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
|
||||
without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
|
||||
cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
|
||||
approximate neighbor search.
|
||||
|
||||
@ -122,128 +156,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
|
||||
was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
|
||||
# Available ANN Indexes {#available_ann_indexes}
|
||||
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
|
||||
linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_annoy_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Annoy currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
|
||||
more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
|
||||
setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
|
||||
careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
|
||||
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
|
||||
values mean more accurate results at the cost of longer query runtime:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
ORDER BY L2Distance(vectors, Point)
|
||||
LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
||||
:::note
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
|
||||
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
|
||||
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
|
||||
navigation around immutable persistent files, without loading them into RAM.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
USearch currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
|
||||
was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
|
||||
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
|
||||
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
|
||||
|
||||
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
|
||||
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
|
||||
considers results of the same query with different tags different.
|
||||
|
||||
Example for creating three different query cache entries for the same query:
|
||||
|
||||
```sql
|
||||
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
|
||||
```
|
||||
|
||||
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
|
||||
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
|
||||
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
|
||||
|
@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## max_keep_alive_requests {#max-keep-alive-requests}
|
||||
|
||||
Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_keep_alive_requests>10</max_keep_alive_requests>
|
||||
```
|
||||
|
||||
## listen_host {#listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
|
@ -1041,3 +1041,14 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%.
|
||||
|
||||
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
|
||||
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
|
||||
|
||||
### deduplicate_merge_projection_mode
|
||||
|
||||
Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting.
|
||||
It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members.
|
||||
|
||||
Possible values:
|
||||
|
||||
- throw, drop, rebuild
|
||||
|
||||
Default value: throw
|
@ -1800,6 +1800,17 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_cache_tag {#query-cache-tag}
|
||||
|
||||
A string which acts as a label for [query cache](../query-cache.md) entries.
|
||||
The same queries with different tags are considered different by the query cache.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string
|
||||
|
||||
Default value: `''`
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
@ -5627,6 +5638,12 @@ Disable all insert and mutations (alter table update / alter table delete / alte
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## use_hive_partitioning
|
||||
|
||||
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
|
||||
|
||||
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
|
||||
|
@ -9,6 +9,7 @@ Columns:
|
||||
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
|
||||
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
|
||||
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
|
||||
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
|
||||
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
|
||||
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
|
||||
@ -26,6 +27,7 @@ Row 1:
|
||||
──────
|
||||
query: SELECT 1 SETTINGS use_query_cache = 1
|
||||
result_size: 128
|
||||
tag:
|
||||
stale: 0
|
||||
shared: 0
|
||||
compressed: 1
|
||||
|
@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax:
|
||||
<column_name> Dynamic(max_types=N)
|
||||
```
|
||||
|
||||
Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
|
||||
Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`.
|
||||
|
||||
:::note
|
||||
The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
|
||||
@ -224,41 +224,43 @@ SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;
|
||||
└───────┴────────────────┘
|
||||
```
|
||||
|
||||
If `K < N`, then the values with the rarest types are converted to `String`:
|
||||
If `K < N`, then the values with the rarest types will be inserted into a single special subcolumn, but still will be accessible:
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ false │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ false │
|
||||
│ 42.42 │ String │ 42.42 │ String │ false │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
If `K=1`, all types are converted to `String`:
|
||||
Functions `isDynamicElementInSharedData` returns `true` for rows that are stored in a special shared data structure inside `Dynamic` and as we can see, resulting column contains only 2 types that are not stored in shared data structure.
|
||||
|
||||
If `K=0`, all types will be inserted into single special subcolumn:
|
||||
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=0) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ String │
|
||||
│ 43 │ Int64 │ 43 │ String │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ true │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ true │
|
||||
│ 42.42 │ String │ 42.42 │ String │ true │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reading Dynamic type from the data
|
||||
@ -411,17 +413,17 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
|
||||
|
||||
## Reaching the limit in number of different data types stored inside Dynamic
|
||||
|
||||
`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values.
|
||||
`Dynamic` data type can store only limited number of different data types as separate subcolumns. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 0 and 254 (due to implementation details, it's impossible to have more than 254 different data types that can be stored as separate subcolumns inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be inserted into a single shared data structure that stores values with different data types in binary form.
|
||||
|
||||
Let's see what happens when the limit is reached in different scenarios.
|
||||
|
||||
### Reaching the limit during data parsing
|
||||
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values:
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted into shared data structure:
|
||||
|
||||
```sql
|
||||
SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
SELECT d, dynamicType(d), isDynamicElementInSharedData(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
{"d" : 42}
|
||||
{"d" : [1, 2, 3]}
|
||||
{"d" : "Hello, World!"}
|
||||
@ -432,22 +434,22 @@ SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d──────────────────────────┬─dynamicType(d)─┐
|
||||
│ 42 │ Int64 │
|
||||
│ [1,2,3] │ Array(Int64) │
|
||||
│ Hello, World! │ String │
|
||||
│ 2020-01-01 │ String │
|
||||
│ ["str1", "str2", "str3"] │ String │
|
||||
│ {"a" : 1, "b" : [1, 2, 3]} │ String │
|
||||
└────────────────────────────┴────────────────┘
|
||||
┌─d──────────────────────┬─dynamicType(d)─────────────────┬─isDynamicElementInSharedData(d)─┐
|
||||
│ 42 │ Int64 │ false │
|
||||
│ [1,2,3] │ Array(Int64) │ false │
|
||||
│ Hello, World! │ String │ false │
|
||||
│ 2020-01-01 │ Date │ true │
|
||||
│ ['str1','str2','str3'] │ Array(String) │ true │
|
||||
│ (1,[1,2,3]) │ Tuple(a Int64, b Array(Int64)) │ true │
|
||||
└────────────────────────┴────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`.
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were inserted into special shared data structure.
|
||||
|
||||
### During merges of data parts in MergeTree table engines
|
||||
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts.
|
||||
In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation.
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types that can be stored in separate subcolumns inside and won't be able to store all types as subcolumns from source parts.
|
||||
In this case ClickHouse chooses what types will remain as separate subcolumns after merge and what types will be inserted into shared data structure. In most cases ClickHouse tries to keep the most frequent types and store the rarest types in shared data structure, but it depends on the implementation.
|
||||
|
||||
Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types:
|
||||
|
||||
@ -463,17 +465,17 @@ INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1);
|
||||
|
||||
Each insert will create a separate data pert with `Dynamic` column containing single type:
|
||||
```sql
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count();
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)──────┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ all_2_2_0 │
|
||||
│ 3 │ Date │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │
|
||||
│ 1 │ String │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_2_2_0 │
|
||||
│ 3 │ Date │ false │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ false │ all_4_4_0 │
|
||||
│ 1 │ String │ false │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Now, let's merge all parts into one and see what will happen:
|
||||
@ -481,18 +483,20 @@ Now, let's merge all parts into one and see what will happen:
|
||||
```sql
|
||||
SYSTEM START MERGES test;
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count() desc;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_5_2 │
|
||||
│ 6 │ String │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ all_1_5_2 │
|
||||
└─────────┴────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_1_5_2 │
|
||||
│ 3 │ Date │ false │ all_1_5_2 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ true │ all_1_5_2 │
|
||||
│ 1 │ String │ true │ all_1_5_2 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` as subcolumns and inserted all other types into shared data.
|
||||
|
||||
## JSONExtract functions with Dynamic
|
||||
|
||||
@ -509,22 +513,23 @@ SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Dynamic)') AS map_of_dynamics, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamics) AS map_of_dynamic_types
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'Int64','b':'String','c':'Array(Nullable(Int64))'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Dynamic') AS dynamics, arrayMap(x -> (x.1, dynamicType(x.2)), dynamics) AS dynamic_types```
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','Int64'),('b','String'),('c','Array(Nullable(Int64))')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Binary output format
|
||||
|
@ -4189,3 +4189,94 @@ Result:
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## getSubcolumn
|
||||
|
||||
Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getSubcolumn(col_name, subcol_name)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
|
||||
- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the extracted sub-column.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
|
||||
SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
|
||||
1. │ [1,2] │ ['Hello','World'] │
|
||||
2. │ [3,4,5] │ ['This','is','subcolumn'] │
|
||||
└─────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## getTypeSerializationStreams
|
||||
|
||||
Enumerates stream paths of a data type.
|
||||
|
||||
:::note
|
||||
This function is intended for use by developers.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getTypeSerializationStreams(col)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col` — Column or string representation of a data-type from which the data type will be detected.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams('Map(String, Int64)');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
@ -103,7 +103,7 @@ LIMIT 2;
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
### Inserting data from a file into a table:
|
||||
### Inserting data from a file into a table
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION
|
||||
@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
|
@ -100,6 +100,19 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -274,6 +274,19 @@ FROM s3(
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -27,7 +27,7 @@ std::string LibraryBridge::bridgeName() const
|
||||
|
||||
LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,12 +9,10 @@ namespace DB
|
||||
{
|
||||
LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -26,17 +24,17 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_ping")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_ping")
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(getContext());
|
||||
}
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_request")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_request")
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(getContext());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -13,7 +13,6 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex
|
||||
public:
|
||||
LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
@ -21,7 +20,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
const std::string name;
|
||||
const size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -87,10 +87,8 @@ static void writeData(Block data, OutputFormatPtr format)
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -137,7 +135,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
const String & dictionary_id = params.get("dictionary_id");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -374,10 +372,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -401,7 +397,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
|
||||
String res = library_handler ? "1" : "0";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
@ -412,11 +408,8 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
|
||||
size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -455,7 +448,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
const String & method = params.get("method");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}'", method);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -617,10 +610,8 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -634,7 +625,7 @@ void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & reque
|
||||
|
||||
String res = "1";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {}", res);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
|
@ -18,14 +18,13 @@ namespace DB
|
||||
class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
static constexpr auto FORMAT = "RowBinary";
|
||||
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -34,12 +33,11 @@ private:
|
||||
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -63,12 +61,11 @@ private:
|
||||
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -77,12 +74,11 @@ private:
|
||||
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
@ -1307,6 +1307,7 @@ try
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
|
||||
|
||||
SingleReadBufferIterator read_buffer_iterator(std::move(file));
|
||||
|
||||
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
|
||||
}
|
||||
else
|
||||
|
@ -202,10 +202,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(
|
||||
response,
|
||||
request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
|
||||
keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(columns.toString(), out);
|
||||
|
@ -15,18 +15,12 @@ namespace DB
|
||||
class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCColumnsInfoHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
|
||||
auto identifier = getIdentifierQuote(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(identifier, out);
|
||||
|
@ -14,18 +14,12 @@ namespace DB
|
||||
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("IdentifierQuoteHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
return;
|
||||
}
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -20,12 +20,10 @@ class ODBCHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCHandler(
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_,
|
||||
const String & mode_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, mode(mode_)
|
||||
{
|
||||
}
|
||||
@ -35,7 +33,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
|
||||
size_t keep_alive_timeout;
|
||||
String mode;
|
||||
|
||||
static inline std::mutex mutex;
|
||||
|
@ -27,7 +27,7 @@ std::string ODBCBridge::bridgeName() const
|
||||
|
||||
ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,11 +9,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger(name_)), name(name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -23,33 +20,33 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
|
||||
LOG_TRACE(log, "Request URI: {}", uri.toString());
|
||||
|
||||
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
return std::make_unique<PingHandler>(keep_alive_timeout);
|
||||
return std::make_unique<PingHandler>();
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
|
||||
if (uri.getPath() == "/columns_info")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/identifier_quote")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<IdentifierQuoteHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/schema_allowed")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<SchemaAllowedHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/write")
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "write");
|
||||
else
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "read");
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -17,14 +17,13 @@ namespace DB
|
||||
class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
std::string name;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerRes
|
||||
{
|
||||
try
|
||||
{
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
const char * data = "Ok.\n";
|
||||
response.sendBuffer(data, strlen(data));
|
||||
}
|
||||
|
@ -9,11 +9,7 @@ namespace DB
|
||||
class PingHandler : public HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
|
||||
bool result = isSchemaAllowed(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeBoolText(result, out);
|
||||
|
@ -17,18 +17,12 @@ class Context;
|
||||
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("SchemaAllowedHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2428,6 +2428,7 @@ void Server::createServers(
|
||||
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
|
||||
http_params->setTimeout(settings.http_receive_timeout);
|
||||
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
|
||||
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys protocols;
|
||||
config.keys("protocols", protocols);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Net/StreamSocket.h>
|
||||
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
@ -25,6 +26,12 @@ static int64_t port = 9000;
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
void on_exit()
|
||||
{
|
||||
BaseDaemon::terminate();
|
||||
main_app.wait();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int LLVMFuzzerInitialize(int * argc, char ***argv)
|
||||
{
|
||||
@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
atexit(on_exit);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,2 +1,2 @@
|
||||
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
|
||||
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
|
||||
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_INCLUDE_WHAT_YOU_USE)
|
||||
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
|
||||
@ -601,10 +601,6 @@ endif()
|
||||
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing)
|
||||
|
||||
if (TARGET ch_contrib::annoy)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::usearch)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
|
||||
endif()
|
||||
|
@ -2751,7 +2751,7 @@ void ClientBase::runLibFuzzer()
|
||||
for (auto & arg : fuzzer_args_holder)
|
||||
fuzzer_args.emplace_back(arg.data());
|
||||
|
||||
int fuzzer_argc = fuzzer_args.size();
|
||||
int fuzzer_argc = static_cast<int>(fuzzer_args.size());
|
||||
char ** fuzzer_argv = fuzzer_args.data();
|
||||
|
||||
LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,6 +3,7 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
@ -19,11 +20,19 @@ namespace DB
|
||||
*
|
||||
* When new values are inserted into Dynamic column, the internal Variant
|
||||
* type and column are extended if the inserted value has new type.
|
||||
* When the limit on number of dynamic types is exceeded, all values
|
||||
* with new types are inserted into special shared variant with type String
|
||||
* that contains values and their types in binary format.
|
||||
*/
|
||||
class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>
|
||||
{
|
||||
public:
|
||||
///
|
||||
/// Maximum limit on dynamic types. We use ColumnVariant to store all the types,
|
||||
/// so the limit cannot be greater then ColumnVariant::MAX_NESTED_COLUMNS.
|
||||
/// We also always have reserved variant for shared variant.
|
||||
static constexpr size_t MAX_DYNAMIC_TYPES_LIMIT = ColumnVariant::MAX_NESTED_COLUMNS - 1;
|
||||
static constexpr const char * SHARED_VARIANT_TYPE_NAME = "SharedVariant";
|
||||
|
||||
struct Statistics
|
||||
{
|
||||
enum class Source
|
||||
@ -32,12 +41,27 @@ public:
|
||||
MERGE, /// Statistics were calculated during merge of several MergeTree parts.
|
||||
};
|
||||
|
||||
explicit Statistics(Source source_) : source(source_) {}
|
||||
|
||||
/// Source of the statistics.
|
||||
Source source;
|
||||
/// Statistics data: (variant name) -> (total variant size in data part).
|
||||
std::unordered_map<String, size_t> data;
|
||||
/// Statistics data for usual variants: (variant name) -> (total variant size in data part).
|
||||
std::unordered_map<String, size_t> variants_statistics;
|
||||
/// Statistics data for variants from shared variant: (variant name) -> (total variant size in data part).
|
||||
/// For shared variant we store statistics only for first 256 variants (should cover almost all cases and it's not expensive).
|
||||
static constexpr const size_t MAX_SHARED_VARIANT_STATISTICS_SIZE = 256;
|
||||
std::unordered_map<String, size_t> shared_variants_statistics;
|
||||
};
|
||||
|
||||
using StatisticsPtr = std::shared_ptr<const Statistics>;
|
||||
|
||||
struct ComparatorBase;
|
||||
using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
|
||||
using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
|
||||
using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
|
||||
using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
|
||||
using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
|
||||
|
||||
private:
|
||||
friend class COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
|
||||
|
||||
@ -54,28 +78,32 @@ private:
|
||||
};
|
||||
|
||||
explicit ColumnDynamic(size_t max_dynamic_types_);
|
||||
ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {});
|
||||
ColumnDynamic(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {});
|
||||
ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {});
|
||||
|
||||
public:
|
||||
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
||||
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
||||
*/
|
||||
using Base = COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
|
||||
static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
|
||||
static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
|
||||
{
|
||||
return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_);
|
||||
return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
|
||||
static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
|
||||
{
|
||||
return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_);
|
||||
return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {});
|
||||
|
||||
static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {})
|
||||
static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
|
||||
{
|
||||
return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_);
|
||||
return Base::create(std::move(variant_column_), variant_type_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
|
||||
}
|
||||
|
||||
static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
|
||||
{
|
||||
return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, global_max_dynamic_types_, statistics_);
|
||||
}
|
||||
|
||||
static MutablePtr create(size_t max_dynamic_types_)
|
||||
@ -83,7 +111,7 @@ public:
|
||||
return Base::create(max_dynamic_types_);
|
||||
}
|
||||
|
||||
std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; }
|
||||
std::string getName() const override { return "Dynamic(max_types=" + std::to_string(global_max_dynamic_types) + ")"; }
|
||||
|
||||
const char * getFamilyName() const override
|
||||
{
|
||||
@ -98,12 +126,12 @@ public:
|
||||
MutableColumnPtr cloneEmpty() const override
|
||||
{
|
||||
/// Keep current dynamic structure
|
||||
return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics);
|
||||
return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, global_max_dynamic_types, statistics);
|
||||
}
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override
|
||||
{
|
||||
return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics);
|
||||
return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, global_max_dynamic_types, statistics);
|
||||
}
|
||||
|
||||
size_t size() const override
|
||||
@ -111,15 +139,9 @@ public:
|
||||
return variant_column->size();
|
||||
}
|
||||
|
||||
Field operator[](size_t n) const override
|
||||
{
|
||||
return (*variant_column)[n];
|
||||
}
|
||||
Field operator[](size_t n) const override;
|
||||
|
||||
void get(size_t n, Field & res) const override
|
||||
{
|
||||
variant_column->get(n, res);
|
||||
}
|
||||
void get(size_t n, Field & res) const override;
|
||||
|
||||
bool isDefaultAt(size_t n) const override
|
||||
{
|
||||
@ -187,7 +209,7 @@ public:
|
||||
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
|
||||
{
|
||||
return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types);
|
||||
return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types, global_max_dynamic_types);
|
||||
}
|
||||
|
||||
void expand(const Filter & mask, bool inverted) override
|
||||
@ -197,17 +219,17 @@ public:
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
||||
{
|
||||
return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types);
|
||||
return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types, global_max_dynamic_types);
|
||||
}
|
||||
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override
|
||||
{
|
||||
return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types);
|
||||
return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types, global_max_dynamic_types);
|
||||
}
|
||||
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override
|
||||
{
|
||||
return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types);
|
||||
return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types, global_max_dynamic_types);
|
||||
}
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
|
||||
@ -216,7 +238,7 @@ public:
|
||||
MutableColumns scattered_columns;
|
||||
scattered_columns.reserve(num_columns);
|
||||
for (auto & scattered_variant_column : scattered_variant_columns)
|
||||
scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types));
|
||||
scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types, global_max_dynamic_types));
|
||||
|
||||
return scattered_columns;
|
||||
}
|
||||
@ -238,16 +260,10 @@ public:
|
||||
}
|
||||
|
||||
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override
|
||||
{
|
||||
variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res);
|
||||
}
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override
|
||||
{
|
||||
variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
|
||||
}
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
void reserve(size_t n) override
|
||||
{
|
||||
@ -295,7 +311,7 @@ public:
|
||||
bool structureEquals(const IColumn & rhs) const override
|
||||
{
|
||||
if (const auto * rhs_concrete = typeid_cast<const ColumnDynamic *>(&rhs))
|
||||
return max_dynamic_types == rhs_concrete->max_dynamic_types;
|
||||
return global_max_dynamic_types == rhs_concrete->global_max_dynamic_types;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -338,17 +354,75 @@ public:
|
||||
const ColumnVariant & getVariantColumn() const { return assert_cast<const ColumnVariant &>(*variant_column); }
|
||||
ColumnVariant & getVariantColumn() { return assert_cast<ColumnVariant &>(*variant_column); }
|
||||
|
||||
bool addNewVariant(const DataTypePtr & new_variant);
|
||||
void addStringVariant();
|
||||
bool addNewVariant(const DataTypePtr & new_variant, const String & new_variant_name);
|
||||
bool addNewVariant(const DataTypePtr & new_variant) { return addNewVariant(new_variant, new_variant->getName()); }
|
||||
|
||||
bool hasDynamicStructure() const override { return true; }
|
||||
void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
|
||||
|
||||
const Statistics & getStatistics() const { return statistics; }
|
||||
const StatisticsPtr & getStatistics() const { return statistics; }
|
||||
void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; }
|
||||
|
||||
size_t getMaxDynamicTypes() const { return max_dynamic_types; }
|
||||
|
||||
/// Check if we can add new variant types.
|
||||
/// Shared variant doesn't count in the limit but always presents,
|
||||
/// so we should subtract 1 from the total types count.
|
||||
bool canAddNewVariants(size_t current_variants_count, size_t new_variants_count) const { return current_variants_count + new_variants_count - 1 <= max_dynamic_types; }
|
||||
bool canAddNewVariant(size_t current_variants_count) const { return canAddNewVariants(current_variants_count, 1); }
|
||||
bool canAddNewVariants(size_t new_variants_count) const { return canAddNewVariants(variant_info.variant_names.size(), new_variants_count); }
|
||||
bool canAddNewVariant() const { return canAddNewVariants(variant_info.variant_names.size(), 1); }
|
||||
|
||||
void setVariantType(const DataTypePtr & variant_type);
|
||||
void setMaxDynamicPaths(size_t max_dynamic_type_);
|
||||
|
||||
static const String & getSharedVariantTypeName()
|
||||
{
|
||||
static const String name = SHARED_VARIANT_TYPE_NAME;
|
||||
return name;
|
||||
}
|
||||
|
||||
static DataTypePtr getSharedVariantDataType();
|
||||
|
||||
ColumnVariant::Discriminator getSharedVariantDiscriminator() const
|
||||
{
|
||||
return variant_info.variant_name_to_discriminator.at(getSharedVariantTypeName());
|
||||
}
|
||||
|
||||
ColumnString & getSharedVariant()
|
||||
{
|
||||
return assert_cast<ColumnString &>(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator()));
|
||||
}
|
||||
|
||||
const ColumnString & getSharedVariant() const
|
||||
{
|
||||
return assert_cast<const ColumnString &>(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator()));
|
||||
}
|
||||
|
||||
/// Serializes type and value in binary format into provided shared variant. Doesn't update Variant discriminators and offsets.
|
||||
static void serializeValueIntoSharedVariant(ColumnString & shared_variant, const IColumn & src, const DataTypePtr & type, const SerializationPtr & serialization, size_t n);
|
||||
|
||||
/// Insert value into shared variant. Also updates Variant discriminators and offsets.
|
||||
void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n);
|
||||
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name) const
|
||||
{
|
||||
/// Get serialization for provided data type.
|
||||
/// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size.
|
||||
/// When max size is reached, just clear the cache.
|
||||
if (serialization_cache.size() == SERIALIZATION_CACHE_MAX_SIZE)
|
||||
serialization_cache.clear();
|
||||
|
||||
if (auto it = serialization_cache.find(variant_name); it != serialization_cache.end())
|
||||
return it->second;
|
||||
|
||||
return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second;
|
||||
}
|
||||
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); }
|
||||
private:
|
||||
void createVariantInfo(const DataTypePtr & variant_type);
|
||||
|
||||
/// Combine current variant with the other variant and return global discriminators mapping
|
||||
/// from other variant to the combined one. It's used for inserting from
|
||||
/// different variants.
|
||||
@ -361,12 +435,19 @@ private:
|
||||
/// Store the type of current variant with some additional information.
|
||||
VariantInfo variant_info;
|
||||
/// The maximum number of different types that can be stored in this Dynamic column.
|
||||
/// If exceeded, all new variants will be converted to String.
|
||||
/// If exceeded, all new variants will be added to a special shared variant with type String
|
||||
/// in binary format. This limit can be different for different instances of Dynamic column.
|
||||
/// When max_dynamic_types = 0, we will have only shared variant and insert all values into it.
|
||||
size_t max_dynamic_types;
|
||||
/// The types limit specified in the data type by the user Dynamic(max_types=N).
|
||||
/// max_dynamic_types in all column instances of this Dynamic type can be only smaller
|
||||
/// (for example, max_dynamic_types can be reduced in takeDynamicStructureFromSourceColumns
|
||||
/// before merge of different Dynamic columns).
|
||||
size_t global_max_dynamic_types;
|
||||
|
||||
/// Size statistics of each variants from MergeTree data part.
|
||||
/// Used in takeDynamicStructureFromSourceColumns and set during deserialization.
|
||||
Statistics statistics;
|
||||
StatisticsPtr statistics;
|
||||
|
||||
/// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column).
|
||||
/// Used to avoid mappings recalculation in combineVariants for the same Variant types.
|
||||
@ -374,6 +455,17 @@ private:
|
||||
/// Cache of Variant types that couldn't be combined with current variant in Dynamic column.
|
||||
/// Used to avoid checking if combination is possible for the same Variant types.
|
||||
std::unordered_set<String> variants_with_failed_combination;
|
||||
|
||||
/// We can use serializations of different data types to serialize values into shared variant.
|
||||
/// To avoid creating the same serialization multiple times, use simple cache.
|
||||
static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256;
|
||||
mutable std::unordered_map<String, SerializationPtr> serialization_cache;
|
||||
};
|
||||
|
||||
void extendVariantColumn(
|
||||
IColumn & variant_column,
|
||||
const DataTypePtr & old_variant_type,
|
||||
const DataTypePtr & new_variant_type,
|
||||
std::unordered_map<String, UInt8> old_variant_name_to_discriminator);
|
||||
|
||||
}
|
||||
|
@ -476,7 +476,7 @@ void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
|
||||
void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, const Discriminator * skip_discriminator)
|
||||
{
|
||||
const size_t num_variants = variants.size();
|
||||
const auto & src = assert_cast<const ColumnVariant &>(src_);
|
||||
@ -557,11 +557,14 @@ void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start,
|
||||
Discriminator global_discr = src_global_discr;
|
||||
if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
|
||||
global_discr = (*global_discriminators_mapping)[src_global_discr];
|
||||
if (!skip_discriminator || global_discr != *skip_discriminator)
|
||||
{
|
||||
Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
|
||||
if (nested_length)
|
||||
variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
|
||||
{
|
||||
@ -610,7 +613,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
|
||||
void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
|
||||
#endif
|
||||
{
|
||||
insertRangeFromImpl(src_, start, length, nullptr);
|
||||
insertRangeFromImpl(src_, start, length, nullptr, nullptr);
|
||||
}
|
||||
|
||||
#if !defined(DEBUG_OR_SANITIZER_BUILD)
|
||||
@ -627,9 +630,9 @@ void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::ve
|
||||
insertFromImpl(src_, n, &global_discriminators_mapping);
|
||||
}
|
||||
|
||||
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
|
||||
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping, Discriminator skip_discriminator)
|
||||
{
|
||||
insertRangeFromImpl(src_, start, length, &global_discriminators_mapping);
|
||||
insertRangeFromImpl(src_, start, length, &global_discriminators_mapping, &skip_discriminator);
|
||||
}
|
||||
|
||||
void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
|
||||
@ -673,6 +676,14 @@ void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator g
|
||||
variants[local_discr]->insertManyFrom(src_, position, length);
|
||||
}
|
||||
|
||||
void ColumnVariant::deserializeBinaryIntoVariant(ColumnVariant::Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings)
|
||||
{
|
||||
auto local_discr = localDiscriminatorByGlobal(global_discr);
|
||||
serialization->deserializeBinary(*variants[local_discr], buf, format_settings);
|
||||
getLocalDiscriminators().push_back(local_discr);
|
||||
getOffsets().push_back(variants[local_discr]->size() - 1);
|
||||
}
|
||||
|
||||
void ColumnVariant::insertDefault()
|
||||
{
|
||||
getLocalDiscriminators().push_back(NULL_DISCRIMINATOR);
|
||||
@ -1213,9 +1224,7 @@ struct ColumnVariant::ComparatorBase
|
||||
|
||||
ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
|
||||
{
|
||||
int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
|
||||
|
||||
return res;
|
||||
return parent.compareAt(lhs, rhs, parent, nan_direction_hint);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -196,13 +198,15 @@ public:
|
||||
|
||||
/// Methods for insertion from another Variant but with known mapping between global discriminators.
|
||||
void insertFrom(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
|
||||
void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
|
||||
/// Don't insert data into variant with skip_discriminator global discriminator, it will be processed separately.
|
||||
void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping, Discriminator skip_discriminator);
|
||||
void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
|
||||
|
||||
/// Methods for insertion into a specific variant.
|
||||
void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n);
|
||||
void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length);
|
||||
void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length);
|
||||
void deserializeBinaryIntoVariant(Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings);
|
||||
|
||||
void insertDefault() override;
|
||||
void insertManyDefaults(size_t length) override;
|
||||
@ -264,6 +268,7 @@ public:
|
||||
ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
|
||||
|
||||
const NestedColumns & getVariants() const { return variants; }
|
||||
NestedColumns & getVariants() { return variants; }
|
||||
|
||||
const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
|
||||
IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
|
||||
@ -303,6 +308,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<Discriminator> getLocalToGlobalDiscriminatorsMapping() const { return local_to_global_discriminators; }
|
||||
|
||||
/// Check if we have only 1 non-empty variant and no NULL values,
|
||||
/// and if so, return the discriminator of this non-empty column.
|
||||
std::optional<Discriminator> getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const;
|
||||
@ -323,7 +330,7 @@ public:
|
||||
|
||||
private:
|
||||
void insertFromImpl(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
|
||||
void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
|
||||
void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, const Discriminator * skip_discriminator);
|
||||
void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
|
||||
|
||||
void initIdentityGlobalToLocalDiscriminatorsMapping();
|
||||
|
@ -7,28 +7,34 @@ using namespace DB;
|
||||
|
||||
TEST(ColumnDynamic, CreateEmpty)
|
||||
{
|
||||
auto column = ColumnDynamic::create(255);
|
||||
auto column = ColumnDynamic::create(254);
|
||||
ASSERT_TRUE(column->empty());
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant");
|
||||
ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
|
||||
ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty());
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertDefault)
|
||||
{
|
||||
auto column = ColumnDynamic::create(255);
|
||||
auto column = ColumnDynamic::create(254);
|
||||
column->insertDefault();
|
||||
ASSERT_TRUE(column->size() == 1);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant");
|
||||
ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
|
||||
ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty());
|
||||
ASSERT_TRUE(column->isNullAt(0));
|
||||
ASSERT_EQ((*column)[0], Field(Null()));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFields)
|
||||
{
|
||||
auto column = ColumnDynamic::create(255);
|
||||
auto column = ColumnDynamic::create(254);
|
||||
column->insert(Field(42));
|
||||
column->insert(Field(-42));
|
||||
column->insert(Field("str1"));
|
||||
@ -41,16 +47,16 @@ TEST(ColumnDynamic, InsertFields)
|
||||
column->insert(Field(43.43));
|
||||
ASSERT_TRUE(column->size() == 10);
|
||||
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
|
||||
std::vector<String> expected_names = {"Float64", "Int8", "String"};
|
||||
ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)");
|
||||
std::vector<String> expected_names = {"Float64", "Int8", "SharedVariant", "String"};
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names, expected_names);
|
||||
std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
|
||||
std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}};
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
|
||||
}
|
||||
|
||||
ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42))
|
||||
{
|
||||
auto column = ColumnDynamic::create(255);
|
||||
auto column = ColumnDynamic::create(254);
|
||||
for (size_t i = 0; i != num_variants; ++i)
|
||||
{
|
||||
Tuple tuple;
|
||||
@ -66,61 +72,71 @@ TEST(ColumnDynamic, InsertFieldsOverflow1)
|
||||
{
|
||||
auto column = getDynamicWithManyVariants(253);
|
||||
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
|
||||
|
||||
column->insert(Field(42.42));
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
|
||||
ASSERT_EQ(column->size(), 254);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
|
||||
column->insert(Field(42));
|
||||
ASSERT_EQ(column->size(), 255);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 1);
|
||||
Field field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "42");
|
||||
ASSERT_EQ(field, 42);
|
||||
|
||||
column->insert(Field(43));
|
||||
ASSERT_EQ(column->size(), 256);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 2);
|
||||
field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "43");
|
||||
ASSERT_EQ(field, 43);
|
||||
|
||||
column->insert(Field("str1"));
|
||||
ASSERT_EQ(column->size(), 257);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 3);
|
||||
field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "str1");
|
||||
|
||||
column->insert(Field(Array({Field(42), Field(43)})));
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 4);
|
||||
field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "[42, 43]");
|
||||
ASSERT_EQ(field, Field(Array({Field(42), Field(43)})));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFieldsOverflow2)
|
||||
{
|
||||
auto column = getDynamicWithManyVariants(254);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
|
||||
column->insert(Field("str1"));
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 1);
|
||||
Field field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "str1");
|
||||
|
||||
column->insert(Field(42));
|
||||
ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
Field field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, "42");
|
||||
ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column->getSharedVariant().size(), 2);
|
||||
field = (*column)[column->size() - 1];
|
||||
ASSERT_EQ(field, 42);
|
||||
}
|
||||
|
||||
ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
for (size_t i = 0; i != num; ++i)
|
||||
{
|
||||
column_from->insert(Field(42));
|
||||
@ -154,41 +170,41 @@ void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynami
|
||||
|
||||
TEST(ColumnDynamic, InsertFrom1)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFrom2)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str"));
|
||||
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFrom3)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str"));
|
||||
column_to->insert(Array({Field(42)}));
|
||||
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
|
||||
checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFromOverflow1)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
|
||||
auto column_to = getDynamicWithManyVariants(253);
|
||||
column_to->insertFrom(*column_from, 0);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
auto field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, 42);
|
||||
@ -196,20 +212,22 @@ TEST(ColumnDynamic, InsertFromOverflow1)
|
||||
column_to->insertFrom(*column_from, 1);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 1);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
|
||||
column_to->insertFrom(*column_from, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "str");
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFromOverflow2)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
|
||||
@ -221,9 +239,32 @@ TEST(ColumnDynamic, InsertFromOverflow2)
|
||||
|
||||
column_to->insertFrom(*column_from, 1);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 1);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertFromOverflow3)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(1);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(41));
|
||||
|
||||
column_to->insertFrom(*column_from, 0);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 0);
|
||||
auto field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, 42);
|
||||
|
||||
column_to->insertFrom(*column_from, 1);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 1);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, 42.42);
|
||||
}
|
||||
|
||||
void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
|
||||
@ -256,42 +297,43 @@ void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDy
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFrom1)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFrom2)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str"));
|
||||
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFrom3)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str"));
|
||||
column_to->insert(Array({Field(42)}));
|
||||
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
|
||||
checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFromOverflow1)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
|
||||
auto column_to = getDynamicWithManyVariants(253);
|
||||
column_to->insertManyFrom(*column_from, 0, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 0);
|
||||
auto field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, 42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
@ -300,15 +342,17 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
|
||||
column_to->insertManyFrom(*column_from, 1, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
|
||||
column_to->insertManyFrom(*column_from, 2, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 4);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "str");
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
@ -317,14 +361,15 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFromOverflow2)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
|
||||
auto column_to = getDynamicWithManyVariants(253);
|
||||
column_to->insertManyFrom(*column_from, 0, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 0);
|
||||
auto field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, 42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
@ -333,11 +378,39 @@ TEST(ColumnDynamic, InsertManyFromOverflow2)
|
||||
column_to->insertManyFrom(*column_from, 1, 2);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, "42.42");
|
||||
ASSERT_EQ(field, 42.42);
|
||||
}
|
||||
|
||||
|
||||
TEST(ColumnDynamic, InsertManyFromOverflow3)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(1);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(41));
|
||||
|
||||
column_to->insertManyFrom(*column_from, 0, 2);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 0);
|
||||
auto field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, 42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, 42);
|
||||
|
||||
column_to->insertManyFrom(*column_from, 1, 2);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, 42.42);
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, 42.42);
|
||||
}
|
||||
|
||||
void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
|
||||
@ -368,34 +441,34 @@ void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnD
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFrom1)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFrom2)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str1"));
|
||||
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFrom3)
|
||||
{
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
column_to->insert(Field(42.42));
|
||||
column_to->insert(Field("str1"));
|
||||
column_to->insert(Array({Field(42)}));
|
||||
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
|
||||
checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow1)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field(42.42));
|
||||
@ -403,23 +476,25 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1)
|
||||
|
||||
auto column_to = getDynamicWithManyVariants(253);
|
||||
column_to->insertRangeFrom(*column_from, 0, 4);
|
||||
ASSERT_EQ(column_to->size(), 257);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
auto field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("42.42"));
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("str"));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow2)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field(42.42));
|
||||
@ -428,19 +503,20 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2)
|
||||
column_to->insertRangeFrom(*column_from, 0, 3);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 1);
|
||||
auto field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("42.42"));
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow3)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field(42.42));
|
||||
@ -449,20 +525,21 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3)
|
||||
column_to->insert(Field("Str"));
|
||||
column_to->insertRangeFrom(*column_from, 0, 3);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 3);
|
||||
auto field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("42.42"));
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow4)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
@ -471,19 +548,20 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4)
|
||||
column_to->insertRangeFrom(*column_from, 0, 3);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 3);
|
||||
auto field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field("42"));
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("42.42"));
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("str"));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow5)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field(42.42));
|
||||
@ -493,22 +571,23 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5)
|
||||
column_to->insert(Field("str"));
|
||||
column_to->insertRangeFrom(*column_from, 0, 4);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 3);
|
||||
auto field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("42.42"));
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("str"));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow6)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field(44));
|
||||
@ -520,13 +599,14 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
|
||||
auto column_to = getDynamicWithManyVariants(253);
|
||||
column_to->insertRangeFrom(*column_from, 2, 5);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 4);
|
||||
auto field = (*column_to)[column_to->size() - 5];
|
||||
|
||||
ASSERT_EQ(field, Field("44"));
|
||||
ASSERT_EQ(field, Field(44));
|
||||
field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
@ -534,12 +614,136 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("str"));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field("[42]"));
|
||||
ASSERT_EQ(field, Field(Array({Field(42)})));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow7)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(2);
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str1"));
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43.43));
|
||||
column_from->insert(Field(Array({Field(41)})));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field("str2"));
|
||||
column_from->insert(Field(Array({Field(42)})));
|
||||
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
column_to->insert(Field(42));
|
||||
|
||||
column_to->insertRangeFrom(*column_from, 0, 8);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 4);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
auto field = (*column_to)[column_to->size() - 8];
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 7];
|
||||
ASSERT_EQ(field, Field("str1"));
|
||||
field = (*column_to)[column_to->size() - 6];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 5];
|
||||
ASSERT_EQ(field, Field(43.43));
|
||||
field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(Array({Field(41)})));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("str2"));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field(Array({Field(42)})));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow8)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(2);
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str1"));
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43.43));
|
||||
column_from->insert(Field(Array({Field(41)})));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field("str2"));
|
||||
column_from->insert(Field(Array({Field(42)})));
|
||||
|
||||
auto column_to = ColumnDynamic::create(2);
|
||||
column_to->insert(Field(42));
|
||||
column_from->insert(Field("str1"));
|
||||
|
||||
column_to->insertRangeFrom(*column_from, 0, 8);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 4);
|
||||
auto field = (*column_to)[column_to->size() - 8];
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 7];
|
||||
ASSERT_EQ(field, Field("str1"));
|
||||
field = (*column_to)[column_to->size() - 6];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 5];
|
||||
ASSERT_EQ(field, Field(43.43));
|
||||
field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(Array({Field(41)})));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("str2"));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field(Array({Field(42)})));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, InsertRangeFromOverflow9)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(3);
|
||||
column_from->insert(Field("str1"));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str2"));
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(43.43));
|
||||
column_from->insert(Field(Array({Field(41)})));
|
||||
column_from->insert(Field(43));
|
||||
column_from->insert(Field("str2"));
|
||||
column_from->insert(Field(Array({Field(42)})));
|
||||
|
||||
auto column_to = ColumnDynamic::create(2);
|
||||
column_to->insert(Field(42));
|
||||
|
||||
column_to->insertRangeFrom(*column_from, 0, 9);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 4);
|
||||
auto field = (*column_to)[column_to->size() - 9];
|
||||
ASSERT_EQ(field, Field("str1"));
|
||||
field = (*column_to)[column_to->size() - 8];
|
||||
ASSERT_EQ(field, Field(42.42));
|
||||
field = (*column_to)[column_to->size() - 7];
|
||||
ASSERT_EQ(field, Field("str2"));
|
||||
field = (*column_to)[column_to->size() - 6];
|
||||
ASSERT_EQ(field, Field(42));
|
||||
field = (*column_to)[column_to->size() - 5];
|
||||
ASSERT_EQ(field, Field(43.43));
|
||||
field = (*column_to)[column_to->size() - 4];
|
||||
ASSERT_EQ(field, Field(Array({Field(41)})));
|
||||
field = (*column_to)[column_to->size() - 3];
|
||||
ASSERT_EQ(field, Field(43));
|
||||
field = (*column_to)[column_to->size() - 2];
|
||||
ASSERT_EQ(field, Field("str2"));
|
||||
field = (*column_to)[column_to->size() - 1];
|
||||
ASSERT_EQ(field, Field(Array({Field(42)})));
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, SerializeDeserializeFromArena1)
|
||||
{
|
||||
auto column = ColumnDynamic::create(255);
|
||||
auto column = ColumnDynamic::create(254);
|
||||
column->insert(Field(42));
|
||||
column->insert(Field(42.42));
|
||||
column->insert(Field("str"));
|
||||
@ -564,7 +768,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena1)
|
||||
|
||||
TEST(ColumnDynamic, SerializeDeserializeFromArena2)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
@ -577,26 +781,26 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2)
|
||||
column_from->serializeValueIntoArena(2, arena, pos);
|
||||
column_from->serializeValueIntoArena(3, arena, pos);
|
||||
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
pos = column_to->deserializeAndInsertFromArena(ref1.data);
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
column_to->deserializeAndInsertFromArena(pos);
|
||||
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
|
||||
std::vector<String> expected_names = {"Float64", "Int8", "String"};
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 2], "str");
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 1], Null());
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)");
|
||||
std::vector<String> expected_names = {"Float64", "Int8", "SharedVariant", "String"};
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
|
||||
std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
|
||||
std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}};
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
|
||||
TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow1)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(254);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
@ -615,18 +819,56 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
column_to->deserializeAndInsertFromArena(pos);
|
||||
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
|
||||
ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 2], "str");
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 1], Null());
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow2)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(2);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
column_from->insert(Field(Null()));
|
||||
column_from->insert(Field(Array({Field(42)})));
|
||||
|
||||
Arena arena;
|
||||
const char * pos = nullptr;
|
||||
auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
|
||||
column_from->serializeValueIntoArena(1, arena, pos);
|
||||
column_from->serializeValueIntoArena(2, arena, pos);
|
||||
column_from->serializeValueIntoArena(3, arena, pos);
|
||||
column_from->serializeValueIntoArena(4, arena, pos);
|
||||
|
||||
auto column_to = ColumnDynamic::create(2);
|
||||
column_to->insert(Field(42.42));
|
||||
pos = column_to->deserializeAndInsertFromArena(ref1.data);
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
pos = column_to->deserializeAndInsertFromArena(pos);
|
||||
column_to->deserializeAndInsertFromArena(pos);
|
||||
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 5], 42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 4], 42.42);
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 3], "str");
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 2], Null());
|
||||
ASSERT_EQ((*column_to)[column_to->size() - 1], Field(Array({Field(42)})));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
|
||||
ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
|
||||
ASSERT_EQ(column_to->getSharedVariant().size(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, skipSerializedInArena)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(255);
|
||||
auto column_from = ColumnDynamic::create(3);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
@ -640,13 +882,41 @@ TEST(ColumnDynamic, skipSerializedInArena)
|
||||
auto ref4 = column_from->serializeValueIntoArena(3, arena, pos);
|
||||
|
||||
const char * end = ref4.data + ref4.size;
|
||||
auto column_to = ColumnDynamic::create(255);
|
||||
auto column_to = ColumnDynamic::create(254);
|
||||
pos = column_to->skipSerializedInArena(ref1.data);
|
||||
pos = column_to->skipSerializedInArena(pos);
|
||||
pos = column_to->skipSerializedInArena(pos);
|
||||
pos = column_to->skipSerializedInArena(pos);
|
||||
|
||||
ASSERT_EQ(pos, end);
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty());
|
||||
ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty());
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
|
||||
ASSERT_EQ(column_to->getVariantInfo().variant_names, Names{"SharedVariant"});
|
||||
}
|
||||
|
||||
TEST(ColumnDynamic, compare)
|
||||
{
|
||||
auto column_from = ColumnDynamic::create(3);
|
||||
column_from->insert(Field(42));
|
||||
column_from->insert(Field(42.42));
|
||||
column_from->insert(Field("str"));
|
||||
column_from->insert(Field(Null()));
|
||||
column_from->insert(Field(Array({Field(42)})));
|
||||
|
||||
ASSERT_EQ(column_from->compareAt(0, 0, *column_from, -1), 0);
|
||||
ASSERT_EQ(column_from->compareAt(0, 1, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(1, 1, *column_from, -1), 0);
|
||||
ASSERT_EQ(column_from->compareAt(0, 2, *column_from, -1), -1);
|
||||
ASSERT_EQ(column_from->compareAt(2, 0, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(2, 4, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(4, 2, *column_from, -1), -1);
|
||||
ASSERT_EQ(column_from->compareAt(4, 4, *column_from, -1), 0);
|
||||
ASSERT_EQ(column_from->compareAt(0, 3, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(1, 3, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(2, 3, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(3, 3, *column_from, -1), 0);
|
||||
ASSERT_EQ(column_from->compareAt(4, 3, *column_from, -1), 1);
|
||||
ASSERT_EQ(column_from->compareAt(3, 0, *column_from, -1), -1);
|
||||
ASSERT_EQ(column_from->compareAt(3, 1, *column_from, -1), -1);
|
||||
ASSERT_EQ(column_from->compareAt(3, 2, *column_from, -1), -1);
|
||||
ASSERT_EQ(column_from->compareAt(3, 4, *column_from, -1), -1);
|
||||
}
|
||||
|
@ -58,6 +58,7 @@
|
||||
#cmakedefine01 USE_FILELOG
|
||||
#cmakedefine01 USE_ODBC
|
||||
#cmakedefine01 USE_BLAKE3
|
||||
#cmakedefine01 USE_USEARCH
|
||||
#cmakedefine01 USE_SKIM
|
||||
#cmakedefine01 USE_PRQL
|
||||
#cmakedefine01 USE_ULID
|
||||
|
@ -134,6 +134,7 @@ namespace DB
|
||||
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
|
||||
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
|
||||
M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
|
||||
M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
|
||||
M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
|
||||
M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
|
||||
M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
|
||||
|
@ -676,6 +676,7 @@ class IColumn;
|
||||
M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \
|
||||
M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
|
||||
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
|
||||
M(String, query_cache_tag, "", "A string which acts as a label for query cache entries. The same queries with different tags are considered different by the query cache.", 0) \
|
||||
M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
|
||||
\
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
@ -907,20 +908,18 @@ class IColumn;
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_time_series_table, false, "Allows experimental TimeSeries table engine", 0) \
|
||||
M(Bool, allow_experimental_vector_similarity_index, false, "Allow experimental vector similarity index", 0) \
|
||||
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
|
||||
M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
|
||||
M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
|
||||
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
|
||||
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
|
||||
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
|
||||
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
|
||||
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
|
||||
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\
|
||||
\
|
||||
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
|
||||
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
|
||||
@ -1036,6 +1035,10 @@ class IColumn;
|
||||
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
|
||||
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
|
||||
MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_annoy_index, false) \
|
||||
MAKE_OBSOLETE(M, UInt64, max_threads_for_annoy_index_creation, 4) \
|
||||
MAKE_OBSOLETE(M, Int64, annoy_index_search_k_nodes, -1) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_usearch_index, false) \
|
||||
MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \
|
||||
|
@ -80,11 +80,14 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"restore_replace_external_engines_to_null", false, false, "New setting."},
|
||||
{"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
|
||||
{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
|
||||
{"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
|
||||
{"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
|
||||
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
|
||||
{"query_cache_tag", "", "", "New setting for labeling query cache settings."},
|
||||
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
|
||||
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
|
||||
{"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
|
||||
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
|
||||
}
|
||||
},
|
||||
{"24.7",
|
||||
|
@ -1,2 +1,2 @@
|
||||
clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
|
||||
target_link_libraries (names_and_types_fuzzer PRIVATE dbms clickhouse_functions)
|
||||
target_link_libraries (names_and_types_fuzzer PRIVATE clickhouse_functions)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Core/Field.h>
|
||||
@ -14,6 +15,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -71,8 +73,8 @@ static DataTypePtr create(const ASTPtr & arguments)
|
||||
|
||||
auto * literal = argument->arguments->children[1]->as<ASTLiteral>();
|
||||
|
||||
if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet<UInt64>() == 0 || literal->value.safeGet<UInt64>() > ColumnVariant::MAX_NESTED_COLUMNS)
|
||||
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255");
|
||||
if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet<UInt64>() > ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT)
|
||||
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 0 and {}", ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT);
|
||||
|
||||
return std::make_shared<DataTypeDynamic>(literal->value.safeGet<UInt64>());
|
||||
}
|
||||
@ -84,30 +86,72 @@ void registerDataTypeDynamic(DataTypeFactory & factory)
|
||||
|
||||
std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const
|
||||
{
|
||||
auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name);
|
||||
auto [type_subcolumn_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name);
|
||||
/// Check if requested subcolumn is a valid data type.
|
||||
auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name));
|
||||
auto subcolumn_type = DataTypeFactory::instance().tryGet(String(type_subcolumn_name));
|
||||
if (!subcolumn_type)
|
||||
{
|
||||
if (throw_if_null)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name);
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", type_subcolumn_name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<SubstreamData> res = std::make_unique<SubstreamData>(subcolumn_type->getDefaultSerialization());
|
||||
res->type = subcolumn_type;
|
||||
std::optional<ColumnVariant::Discriminator> discriminator;
|
||||
ColumnPtr null_map_for_variant_from_shared_variant;
|
||||
if (data.column)
|
||||
{
|
||||
/// If column was provided, we should extract subcolumn from Dynamic column.
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(*data.column);
|
||||
const auto & variant_info = dynamic_column.getVariantInfo();
|
||||
const auto & variant_column = dynamic_column.getVariantColumn();
|
||||
const auto & shared_variant = dynamic_column.getSharedVariant();
|
||||
/// Check if provided Dynamic column has subcolumn of this type.
|
||||
auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName());
|
||||
String subcolumn_type_name = subcolumn_type->getName();
|
||||
auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type_name);
|
||||
if (it != variant_info.variant_name_to_discriminator.end())
|
||||
{
|
||||
discriminator = it->second;
|
||||
res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator);
|
||||
res->column = variant_column.getVariantPtrByGlobalDiscriminator(*discriminator);
|
||||
}
|
||||
/// Otherwise if there is data in shared variant try to find requested type there.
|
||||
else if (!shared_variant.empty())
|
||||
{
|
||||
/// Create null map for resulting subcolumn to make it Nullable.
|
||||
auto null_map_column = ColumnUInt8::create();
|
||||
NullMap & null_map = assert_cast<ColumnUInt8 &>(*null_map_column).getData();
|
||||
null_map.reserve(variant_column.size());
|
||||
auto subcolumn = subcolumn_type->createColumn();
|
||||
auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column.getSharedVariantDiscriminator());
|
||||
const auto & local_discriminators = variant_column.getLocalDiscriminators();
|
||||
const auto & offsets = variant_column.getOffsets();
|
||||
const FormatSettings format_settings;
|
||||
for (size_t i = 0; i != local_discriminators.size(); ++i)
|
||||
{
|
||||
if (local_discriminators[i] == shared_variant_local_discr)
|
||||
{
|
||||
auto value = shared_variant.getDataAt(offsets[i]);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
if (type->getName() == subcolumn_type_name)
|
||||
{
|
||||
dynamic_column.getVariantSerialization(subcolumn_type, subcolumn_type_name)->deserializeBinary(*subcolumn, buf, format_settings);
|
||||
null_map.push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
null_map.push_back(1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
null_map.push_back(1);
|
||||
}
|
||||
}
|
||||
|
||||
res->column = std::move(subcolumn);
|
||||
null_map_for_variant_from_shared_variant = std::move(null_map_column);
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,7 +169,7 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn);
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), String(subcolumn_nested_name), is_null_map_subcolumn);
|
||||
/// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()).
|
||||
bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality();
|
||||
if (!is_null_map_subcolumn && make_subcolumn_nullable)
|
||||
@ -133,10 +177,10 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
|
||||
if (data.column)
|
||||
{
|
||||
/// Check if provided Dynamic column has subcolumn of this type. In this case we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
|
||||
/// create full subcolumn from variant according to discriminators.
|
||||
if (discriminator)
|
||||
{
|
||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
|
||||
/// create full subcolumn from variant according to discriminators.
|
||||
const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
|
||||
std::unique_ptr<ISerialization::ISubcolumnCreator> creator;
|
||||
if (is_null_map_subcolumn)
|
||||
@ -154,6 +198,21 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
make_subcolumn_nullable);
|
||||
res->column = creator->create(res->column);
|
||||
}
|
||||
/// Check if requested type was extracted from shared variant. In this case we should use
|
||||
/// VariantSubcolumnCreator to create full subcolumn from variant according to created null map.
|
||||
else if (null_map_for_variant_from_shared_variant)
|
||||
{
|
||||
if (is_null_map_subcolumn)
|
||||
{
|
||||
res->column = null_map_for_variant_from_shared_variant;
|
||||
}
|
||||
else
|
||||
{
|
||||
SerializationVariantElement::VariantSubcolumnCreator creator(
|
||||
null_map_for_variant_from_shared_variant, "", 0, 0, make_subcolumn_nullable, null_map_for_variant_from_shared_variant);
|
||||
res->column = creator.create(res->column);
|
||||
}
|
||||
}
|
||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
|
@ -150,6 +150,12 @@ DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) cons
|
||||
return type;
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeFactory::getCustom(const String & base_name, DataTypeCustomDescPtr customization) const
|
||||
{
|
||||
auto type = get(base_name);
|
||||
type->setCustomization(std::move(customization));
|
||||
return type;
|
||||
}
|
||||
|
||||
void DataTypeFactory::registerDataType(const String & family_name, Value creator, Case case_sensitiveness)
|
||||
{
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
DataTypePtr get(const String & family_name, const ASTPtr & parameters) const;
|
||||
DataTypePtr get(const ASTPtr & ast) const;
|
||||
DataTypePtr getCustom(DataTypeCustomDescPtr customization) const;
|
||||
DataTypePtr getCustom(const String & base_name, DataTypeCustomDescPtr customization) const;
|
||||
|
||||
/// Return nullptr in case of error.
|
||||
DataTypePtr tryGet(const String & full_name) const;
|
||||
|
@ -192,17 +192,12 @@ MutableColumnPtr DataTypeTuple::createColumn() const
|
||||
|
||||
MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
|
||||
{
|
||||
/// If we read Tuple as Variant subcolumn, it may be wrapped to SerializationVariantElement.
|
||||
/// Here we don't need it, so we drop this wrapper.
|
||||
const auto * current_serialization = &serialization;
|
||||
while (const auto * serialization_variant_element = typeid_cast<const SerializationVariantElement *>(current_serialization))
|
||||
current_serialization = serialization_variant_element->getNested().get();
|
||||
|
||||
/// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
|
||||
/// If we read subcolumn of nested Tuple or this Tuple is a subcolumn, it may be wrapped to SerializationWrapper
|
||||
/// several times to allow to reconstruct the substream path name.
|
||||
/// Here we don't need substream path name, so we drop first several wrapper serializations.
|
||||
while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
|
||||
current_serialization = serialization_named->getNested().get();
|
||||
const auto * current_serialization = &serialization;
|
||||
while (const auto * serialization_wrapper = dynamic_cast<const SerializationWrapper *>(current_serialization))
|
||||
current_serialization = serialization_wrapper->getNested().get();
|
||||
|
||||
const auto * serialization_tuple = typeid_cast<const SerializationTuple *>(current_serialization);
|
||||
if (!serialization_tuple)
|
||||
|
@ -117,7 +117,7 @@ bool DataTypeVariant::equals(const IDataType & rhs) const
|
||||
|
||||
/// The same data types with different custom names considered different.
|
||||
/// For example, UInt8 and Bool.
|
||||
if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName())
|
||||
if ((variants[i]->hasCustomName() || rhs_variant.variants[i]->hasCustomName()) && variants[i]->getName() != rhs_variant.variants[i]->getName())
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -444,7 +444,7 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf)
|
||||
case BinaryTypeIndex::Dynamic:
|
||||
{
|
||||
const auto & dynamic_type = assert_cast<const DataTypeDynamic &>(*type);
|
||||
/// Maximum number of dynamic types is 255, we can write it as 1 byte.
|
||||
/// Maximum number of dynamic types is 254, we can write it as 1 byte.
|
||||
writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf);
|
||||
break;
|
||||
}
|
||||
|
@ -27,15 +27,21 @@ namespace ErrorCodes
|
||||
struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState
|
||||
{
|
||||
SerializationDynamic::DynamicStructureSerializationVersion structure_version;
|
||||
size_t max_dynamic_types;
|
||||
DataTypePtr variant_type;
|
||||
Names variant_names;
|
||||
SerializationPtr variant_serialization;
|
||||
ISerialization::SerializeBinaryBulkStatePtr variant_state;
|
||||
|
||||
/// Variants statistics. Map (Variant name) -> (Variant size).
|
||||
ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} };
|
||||
/// Variants statistics.
|
||||
ColumnDynamic::Statistics statistics;
|
||||
/// If true, statistics will be recalculated during serialization.
|
||||
bool recalculate_statistics = false;
|
||||
|
||||
explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
|
||||
explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_)
|
||||
: structure_version(structure_version_), statistics(ColumnDynamic::Statistics::Source::READ)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState
|
||||
@ -106,20 +112,41 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
|
||||
writeBinaryLittleEndian(structure_version, *stream);
|
||||
auto dynamic_state = std::make_shared<SerializeBinaryBulkStateDynamic>(structure_version);
|
||||
|
||||
dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes();
|
||||
/// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types
|
||||
/// that is specified in the Dynamic type (we could decrease it before merge).
|
||||
writeBinaryLittleEndian(dynamic_state->max_dynamic_types, *stream);
|
||||
|
||||
dynamic_state->variant_type = variant_info.variant_type;
|
||||
dynamic_state->variant_names = variant_info.variant_names;
|
||||
const auto & variant_column = column_dynamic.getVariantColumn();
|
||||
|
||||
/// Write internal Variant type name.
|
||||
/// Write information about variants.
|
||||
size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it.
|
||||
writeBinaryLittleEndian(num_variants, *stream);
|
||||
if (settings.data_types_binary_encoding)
|
||||
encodeDataType(dynamic_state->variant_type, *stream);
|
||||
{
|
||||
const auto & variants = assert_cast<const DataTypeVariant &>(*dynamic_state->variant_type).getVariants();
|
||||
for (const auto & variant: variants)
|
||||
{
|
||||
if (variant->getName() != ColumnDynamic::getSharedVariantTypeName())
|
||||
encodeDataType(variant, *stream);
|
||||
}
|
||||
}
|
||||
else
|
||||
writeStringBinary(dynamic_state->variant_type->getName(), *stream);
|
||||
{
|
||||
for (const auto & name : dynamic_state->variant_names)
|
||||
{
|
||||
if (name != ColumnDynamic::getSharedVariantTypeName())
|
||||
writeStringBinary(name, *stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Write statistics in prefix if needed.
|
||||
if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX)
|
||||
{
|
||||
const auto & statistics = column_dynamic.getStatistics();
|
||||
/// First, write statistics for usual variants.
|
||||
for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
|
||||
{
|
||||
size_t size = 0;
|
||||
@ -129,13 +156,55 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
|
||||
/// - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ).
|
||||
/// We can rely only on statistics calculated during the merge, because column with statistics that was read
|
||||
/// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated.
|
||||
if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE)
|
||||
size = statistics.data.at(variant_info.variant_names[i]);
|
||||
if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE)
|
||||
size = statistics->variants_statistics.at(variant_info.variant_names[i]);
|
||||
/// Otherwise we can use only variant sizes from current column.
|
||||
else
|
||||
size = variant_column.getVariantByGlobalDiscriminator(i).size();
|
||||
writeVarUInt(size, *stream);
|
||||
}
|
||||
|
||||
/// Second, write statistics for variants in shared variant.
|
||||
/// Check if we have statistics calculated during merge of some data parts (Statistics::Source::MERGE).
|
||||
if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE)
|
||||
{
|
||||
writeVarUInt(statistics->shared_variants_statistics.size(), *stream);
|
||||
for (const auto & [variant_name, size] : statistics->shared_variants_statistics)
|
||||
{
|
||||
writeStringBinary(variant_name, *stream);
|
||||
writeVarUInt(size, *stream);
|
||||
}
|
||||
}
|
||||
/// If we don't have statistics for shared variants from merge, calculate it from the column.
|
||||
else
|
||||
{
|
||||
std::unordered_map<String, size_t> shared_variants_statistics;
|
||||
const auto & shared_variant = column_dynamic.getSharedVariant();
|
||||
for (size_t i = 0; i != shared_variant.size(); ++i)
|
||||
{
|
||||
auto value = shared_variant.getDataAt(i);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
auto type_name = type->getName();
|
||||
if (auto it = shared_variants_statistics.find(type_name); it != shared_variants_statistics.end())
|
||||
++it->second;
|
||||
else if (shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
|
||||
shared_variants_statistics.emplace(type_name, 1);
|
||||
}
|
||||
|
||||
writeVarUInt(shared_variants_statistics.size(), *stream);
|
||||
for (const auto & [variant_name, size] : shared_variants_statistics)
|
||||
{
|
||||
writeStringBinary(variant_name, *stream);
|
||||
writeVarUInt(size, *stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Otherwise statistics will be written in the suffix, in this case we will recalculate
|
||||
/// statistics during serialization to make it more precise.
|
||||
else
|
||||
{
|
||||
dynamic_state->recalculate_statistics = true;
|
||||
}
|
||||
|
||||
dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization();
|
||||
@ -182,33 +251,58 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD
|
||||
UInt64 structure_version;
|
||||
readBinaryLittleEndian(structure_version, *structure_stream);
|
||||
auto structure_state = std::make_shared<DeserializeBinaryBulkStateDynamicStructure>(structure_version);
|
||||
/// Read internal Variant type name.
|
||||
/// Read max_dynamic_types parameter.
|
||||
readBinaryLittleEndian(structure_state->max_dynamic_types, *structure_stream);
|
||||
/// Read information about variants.
|
||||
DataTypes variants;
|
||||
size_t num_variants;
|
||||
readBinaryLittleEndian(num_variants, *structure_stream);
|
||||
variants.reserve(num_variants + 1); /// +1 for shared variant.
|
||||
if (settings.data_types_binary_encoding)
|
||||
{
|
||||
structure_state->variant_type = decodeDataType(*structure_stream);
|
||||
for (size_t i = 0; i != num_variants; ++i)
|
||||
variants.push_back(decodeDataType(*structure_stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
String data_type_name;
|
||||
for (size_t i = 0; i != num_variants; ++i)
|
||||
{
|
||||
readStringBinary(data_type_name, *structure_stream);
|
||||
structure_state->variant_type = DataTypeFactory::instance().get(data_type_name);
|
||||
variants.push_back(DataTypeFactory::instance().get(data_type_name));
|
||||
}
|
||||
const auto * variant_type = typeid_cast<const DataTypeVariant *>(structure_state->variant_type.get());
|
||||
if (!variant_type)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName());
|
||||
}
|
||||
/// Add shared variant, Dynamic column should always have it.
|
||||
variants.push_back(ColumnDynamic::getSharedVariantDataType());
|
||||
auto variant_type = std::make_shared<DataTypeVariant>(variants);
|
||||
|
||||
/// Read statistics.
|
||||
if (settings.dynamic_read_statistics)
|
||||
{
|
||||
const auto & variants = variant_type->getVariants();
|
||||
ColumnDynamic::Statistics statistics(ColumnDynamic::Statistics::Source::READ);
|
||||
/// First, read statistics for usual variants.
|
||||
size_t variant_size;
|
||||
for (const auto & variant : variants)
|
||||
for (const auto & variant : variant_type->getVariants())
|
||||
{
|
||||
readVarUInt(variant_size, *structure_stream);
|
||||
structure_state->statistics.data[variant->getName()] = variant_size;
|
||||
}
|
||||
statistics.variants_statistics[variant->getName()] = variant_size;
|
||||
}
|
||||
|
||||
/// Second, read statistics for shared variants.
|
||||
size_t statistics_size;
|
||||
readVarUInt(statistics_size, *structure_stream);
|
||||
String variant_name;
|
||||
for (size_t i = 0; i != statistics_size; ++i)
|
||||
{
|
||||
readStringBinary(variant_name, *structure_stream);
|
||||
readVarUInt(variant_size, *structure_stream);
|
||||
statistics.shared_variants_statistics[variant_name] = variant_size;
|
||||
}
|
||||
|
||||
structure_state->statistics = std::make_shared<const ColumnDynamic::Statistics>(std::move(statistics));
|
||||
}
|
||||
|
||||
structure_state->variant_type = std::move(variant_type);
|
||||
state = structure_state;
|
||||
addToSubstreamsDeserializeStatesCache(cache, settings.path, state);
|
||||
}
|
||||
@ -231,8 +325,16 @@ void SerializationDynamic::serializeBinaryBulkStateSuffix(
|
||||
/// Write statistics in suffix if needed.
|
||||
if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX)
|
||||
{
|
||||
/// First, write statistics for usual variants.
|
||||
for (const auto & variant_name : dynamic_state->variant_names)
|
||||
writeVarUInt(dynamic_state->statistics.data[variant_name], *stream);
|
||||
writeVarUInt(dynamic_state->statistics.variants_statistics[variant_name], *stream);
|
||||
/// Second, write statistics for shared variants.
|
||||
writeVarUInt(dynamic_state->statistics.shared_variants_statistics.size(), *stream);
|
||||
for (const auto & [variant_name, size] : dynamic_state->statistics.shared_variants_statistics)
|
||||
{
|
||||
writeStringBinary(variant_name, *stream);
|
||||
writeVarUInt(size, *stream);
|
||||
}
|
||||
}
|
||||
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
@ -255,9 +357,42 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams(
|
||||
if (!variant_info.variant_type->equals(*dynamic_state->variant_type))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName());
|
||||
|
||||
if (column_dynamic.getMaxDynamicTypes() != dynamic_state->max_dynamic_types)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_types parameter of Dynamic. Expected: {}, Got: {}", dynamic_state->max_dynamic_types, column_dynamic.getMaxDynamicTypes());
|
||||
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
if (dynamic_state->recalculate_statistics)
|
||||
{
|
||||
assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization)
|
||||
.serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data);
|
||||
.serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.variants_statistics);
|
||||
/// Calculate statistics for shared variants.
|
||||
const auto & shared_variant = column_dynamic.getSharedVariant();
|
||||
if (!shared_variant.empty())
|
||||
{
|
||||
const auto & local_discriminators = variant_column->getLocalDiscriminators();
|
||||
const auto & offsets = variant_column->getOffsets();
|
||||
const auto shared_variant_discr = variant_column->localDiscriminatorByGlobal(column_dynamic.getSharedVariantDiscriminator());
|
||||
size_t end = limit == 0 || offset + limit > local_discriminators.size() ? local_discriminators.size() : offset + limit;
|
||||
for (size_t i = offset; i != end; ++i)
|
||||
{
|
||||
if (local_discriminators[i] == shared_variant_discr)
|
||||
{
|
||||
auto value = shared_variant.getDataAt(offsets[i]);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
auto type_name = type->getName();
|
||||
if (auto it = dynamic_state->statistics.shared_variants_statistics.find(type_name); it != dynamic_state->statistics.shared_variants_statistics.end())
|
||||
++it->second;
|
||||
else if (dynamic_state->statistics.shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
|
||||
dynamic_state->statistics.shared_variants_statistics.emplace(type_name, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization).serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state);
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -272,13 +407,17 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams(
|
||||
return;
|
||||
|
||||
auto mutable_column = column->assumeMutable();
|
||||
auto & column_dynamic = assert_cast<ColumnDynamic &>(*mutable_column);
|
||||
auto * dynamic_state = checkAndGetState<DeserializeBinaryBulkStateDynamic>(state);
|
||||
auto * structure_state = checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(dynamic_state->structure_state);
|
||||
|
||||
if (mutable_column->empty())
|
||||
mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics);
|
||||
{
|
||||
column_dynamic.setMaxDynamicPaths(structure_state->max_dynamic_types);
|
||||
column_dynamic.setVariantType(structure_state->variant_type);
|
||||
column_dynamic.setStatistics(structure_state->statistics);
|
||||
}
|
||||
|
||||
auto & column_dynamic = assert_cast<ColumnDynamic &>(*mutable_column);
|
||||
const auto & variant_info = column_dynamic.getVariantInfo();
|
||||
if (!variant_info.variant_type->equals(*structure_state->variant_type))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName());
|
||||
@ -329,24 +468,42 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu
|
||||
encodeDataType(std::make_shared<DataTypeNothing>(), ostr);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
|
||||
encodeDataType(variant_type, ostr);
|
||||
variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
|
||||
/// Check if this value is in shared variant. In this case it's already
|
||||
/// in desired binary format.
|
||||
else if (global_discr == dynamic_column.getSharedVariantDiscriminator())
|
||||
{
|
||||
auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num));
|
||||
ostr.write(value.data, value.size);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename DeserializeFunc>
|
||||
static void deserializeVariant(
|
||||
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
|
||||
const auto & variant_type_name = variant_info.variant_names[global_discr];
|
||||
encodeDataType(variant_type, ostr);
|
||||
dynamic_column.getVariantSerialization(variant_type, variant_type_name)->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
template <typename ReturnType = void, typename DeserializeFunc>
|
||||
static ReturnType deserializeVariant(
|
||||
ColumnVariant & variant_column,
|
||||
const DataTypePtr & variant_type,
|
||||
const SerializationPtr & variant_serialization,
|
||||
ColumnVariant::Discriminator global_discr,
|
||||
ReadBuffer & istr,
|
||||
DeserializeFunc deserialize)
|
||||
{
|
||||
auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr);
|
||||
deserialize(*variant_type->getDefaultSerialization(), variant, istr);
|
||||
if constexpr (std::is_same_v<ReturnType, bool>)
|
||||
{
|
||||
if (!deserialize(*variant_serialization, variant, istr))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
deserialize(*variant_serialization, variant, istr);
|
||||
}
|
||||
variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr));
|
||||
variant_column.getOffsets().push_back(variant.size() - 1);
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -360,11 +517,12 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr
|
||||
}
|
||||
|
||||
auto variant_type_name = variant_type->getName();
|
||||
const auto & variant_serialization = dynamic_column.getVariantSerialization(variant_type, variant_type_name);
|
||||
const auto & variant_info = dynamic_column.getVariantInfo();
|
||||
auto it = variant_info.variant_name_to_discriminator.find(variant_type_name);
|
||||
if (it != variant_info.variant_name_to_discriminator.end())
|
||||
{
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
|
||||
return;
|
||||
}
|
||||
|
||||
@ -372,25 +530,15 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr
|
||||
if (dynamic_column.addNewVariant(variant_type))
|
||||
{
|
||||
auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name);
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
|
||||
return;
|
||||
}
|
||||
|
||||
/// We reached maximum number of variants and couldn't add new variant.
|
||||
/// This case should be really rare in real use cases.
|
||||
/// We should always be able to add String variant and insert value as String.
|
||||
dynamic_column.addStringVariant();
|
||||
/// In this case we insert this value into shared variant in binary form.
|
||||
auto tmp_variant_column = variant_type->createColumn();
|
||||
variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings);
|
||||
auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
|
||||
auto & variant_column = dynamic_column.getVariantColumn();
|
||||
variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0);
|
||||
}
|
||||
|
||||
void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
variant_serialization->deserializeBinary(*tmp_variant_column, istr, settings);
|
||||
dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0);
|
||||
}
|
||||
|
||||
template <typename ReadFieldFunc, typename TryDeserializeVariantFunc, typename DeserializeVariant>
|
||||
@ -406,6 +554,7 @@ static void deserializeTextImpl(
|
||||
auto & dynamic_column = assert_cast<ColumnDynamic &>(column);
|
||||
auto & variant_column = dynamic_column.getVariantColumn();
|
||||
const auto & variant_info = dynamic_column.getVariantInfo();
|
||||
const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
|
||||
String field = read_field(istr);
|
||||
auto field_buf = std::make_unique<ReadBufferFromString>(field);
|
||||
JSONInferenceInfo json_info;
|
||||
@ -413,27 +562,81 @@ static void deserializeTextImpl(
|
||||
if (escaping_rule == FormatSettings::EscapingRule::JSON)
|
||||
transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info);
|
||||
|
||||
if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type))
|
||||
/// If inferred type is not complete, we cannot add it as a new variant.
|
||||
/// Let's try to deserialize this field into existing variants.
|
||||
/// If failed, insert this value as String.
|
||||
if (!checkIfTypeIsComplete(variant_type))
|
||||
{
|
||||
auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName());
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant);
|
||||
size_t shared_variant_discr = dynamic_column.getSharedVariantDiscriminator();
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
field_buf = std::make_unique<ReadBufferFromString>(field);
|
||||
if (i != shared_variant_discr
|
||||
&& deserializeVariant<bool>(
|
||||
variant_column,
|
||||
dynamic_column.getVariantSerialization(variant_types[i], variant_info.variant_names[i]),
|
||||
i,
|
||||
*field_buf,
|
||||
try_deserialize_variant))
|
||||
return;
|
||||
}
|
||||
|
||||
/// We couldn't infer type or add new variant. Try to insert field into current variants.
|
||||
field_buf = std::make_unique<ReadBufferFromString>(field);
|
||||
if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf))
|
||||
return;
|
||||
|
||||
/// We couldn't insert field into any existing variant, add String variant and read value as String.
|
||||
dynamic_column.addStringVariant();
|
||||
|
||||
variant_type = std::make_shared<DataTypeString>();
|
||||
/// To be able to deserialize field as String with Quoted escaping rule, it should be quoted.
|
||||
if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\''))
|
||||
field = "'" + field + "'";
|
||||
}
|
||||
else if (dynamic_column.addNewVariant(variant_type, variant_type->getName()))
|
||||
{
|
||||
auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName());
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), dynamic_column.getVariantSerialization(variant_type), discr, *field_buf, deserialize_variant);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We couldn't infer type or add new variant. Insert it into shared variant.
|
||||
auto tmp_variant_column = variant_type->createColumn();
|
||||
field_buf = std::make_unique<ReadBufferFromString>(field);
|
||||
auto string_discr = variant_info.variant_name_to_discriminator.at("String");
|
||||
deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared<DataTypeString>(), string_discr, *field_buf, deserialize_variant);
|
||||
auto variant_type_name = variant_type->getName();
|
||||
deserialize_variant(*dynamic_column.getVariantSerialization(variant_type, variant_type_name), *tmp_variant_column, *field_buf);
|
||||
dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0);
|
||||
}
|
||||
|
||||
template <typename NestedSerialize>
|
||||
static void serializeTextImpl(
|
||||
const IColumn & column,
|
||||
size_t row_num,
|
||||
WriteBuffer & ostr,
|
||||
const FormatSettings & settings,
|
||||
NestedSerialize nested_serialize)
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
const auto & variant_column = dynamic_column.getVariantColumn();
|
||||
/// Check if this row has value in shared variant. In this case we should first deserialize it from binary format.
|
||||
if (variant_column.globalDiscriminatorAt(row_num) == dynamic_column.getSharedVariantDiscriminator())
|
||||
{
|
||||
auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num));
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto variant_type = decodeDataType(buf);
|
||||
auto tmp_variant_column = variant_type->createColumn();
|
||||
auto variant_serialization = dynamic_column.getVariantSerialization(variant_type);
|
||||
variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings);
|
||||
nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr);
|
||||
}
|
||||
/// Otherwise just use serialization for Variant.
|
||||
else
|
||||
{
|
||||
nested_serialize(*dynamic_column.getVariantInfo().variant_type->getDefaultSerialization(), variant_column, row_num, ostr);
|
||||
}
|
||||
}
|
||||
|
||||
void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextCSV(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -466,8 +669,12 @@ bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB
|
||||
|
||||
void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextEscaped(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -500,8 +707,12 @@ bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::R
|
||||
|
||||
void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextQuoted(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -534,8 +745,12 @@ bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::Re
|
||||
|
||||
void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextJSON(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -568,8 +783,12 @@ bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::Read
|
||||
|
||||
void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextRaw(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -602,8 +821,12 @@ bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadB
|
||||
|
||||
void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeText(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -636,8 +859,12 @@ bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::Rea
|
||||
|
||||
void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
|
||||
dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings);
|
||||
auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
|
||||
{
|
||||
serialization.serializeTextXML(col, row, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -105,9 +105,13 @@ private:
|
||||
{
|
||||
DynamicStructureSerializationVersion structure_version;
|
||||
DataTypePtr variant_type;
|
||||
ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}};
|
||||
size_t max_dynamic_types;
|
||||
ColumnDynamic::StatisticsPtr statistics;
|
||||
|
||||
explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {}
|
||||
explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_)
|
||||
: structure_version(structure_version_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
size_t max_dynamic_types;
|
||||
|
@ -4,7 +4,10 @@
|
||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
@ -21,6 +24,8 @@ struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::Deseria
|
||||
ISerialization::DeserializeBinaryBulkStatePtr structure_state;
|
||||
SerializationPtr variant_serialization;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
||||
bool read_from_shared_variant;
|
||||
ColumnPtr shared_variant;
|
||||
};
|
||||
|
||||
void SerializationDynamicElement::enumerateStreams(
|
||||
@ -73,9 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
|
||||
|
||||
auto dynamic_element_state = std::make_shared<DeserializeBinaryBulkStateDynamicElement>();
|
||||
dynamic_element_state->structure_state = std::move(structure_state);
|
||||
const auto & variant_type = checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(dynamic_element_state->structure_state)->variant_type;
|
||||
const auto & variant_type = assert_cast<const DataTypeVariant &>(
|
||||
*checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(dynamic_element_state->structure_state)->variant_type);
|
||||
/// Check if we actually have required element in the Variant.
|
||||
if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
|
||||
if (auto global_discr = variant_type.tryGetVariantDiscriminator(dynamic_element_name))
|
||||
{
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
if (is_null_map_subcolumn)
|
||||
@ -83,6 +89,21 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
|
||||
else
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
||||
dynamic_element_state->read_from_shared_variant = false;
|
||||
settings.path.pop_back();
|
||||
}
|
||||
/// If we don't have this element in the Variant, we will read shared variant and try to find it there.
|
||||
else
|
||||
{
|
||||
auto shared_variant_global_discr = variant_type.tryGetVariantDiscriminator(ColumnDynamic::getSharedVariantTypeName());
|
||||
chassert(shared_variant_global_discr.has_value());
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(
|
||||
ColumnDynamic::getSharedVariantDataType()->getDefaultSerialization(),
|
||||
ColumnDynamic::getSharedVariantTypeName(),
|
||||
*shared_variant_global_discr);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
||||
dynamic_element_state->read_from_shared_variant = true;
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -115,23 +136,103 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
|
||||
|
||||
if (dynamic_element_state->variant_serialization)
|
||||
/// Check if this subcolumn should not be read from shared variant.
|
||||
/// In this case just read data from the corresponding variant.
|
||||
if (!dynamic_element_state->read_from_shared_variant)
|
||||
{
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(
|
||||
result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
/// Otherwise, read the shared variant column and extract requested type from it.
|
||||
else
|
||||
{
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
/// Initialize shared_variant column if needed.
|
||||
if (result_column->empty())
|
||||
dynamic_element_state->shared_variant = makeNullable(ColumnDynamic::getSharedVariantDataType()->createColumn());
|
||||
size_t prev_size = result_column->size();
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(
|
||||
dynamic_element_state->shared_variant, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
|
||||
/// If we need to read a subcolumn from variant column, create an empty variant column, fill it and extract subcolumn.
|
||||
auto variant_type = DataTypeFactory::instance().get(dynamic_element_name);
|
||||
auto result_type = makeNullableOrLowCardinalityNullableSafe(variant_type);
|
||||
MutableColumnPtr variant_column = nested_subcolumn.empty() || is_null_map_subcolumn ? result_column->assumeMutable() : result_type->createColumn();
|
||||
variant_column->reserve(variant_column->size() + limit);
|
||||
MutableColumnPtr non_nullable_variant_column = variant_column->assumeMutable();
|
||||
NullMap * null_map = nullptr;
|
||||
bool is_low_cardinality_nullable = isColumnLowCardinalityNullable(*variant_column);
|
||||
/// Resulting subolumn can be Nullable, but value is serialized in shared variant as non-Nullable.
|
||||
/// Extract non-nullable column and remember the null map to fill it during deserialization.
|
||||
if (isColumnNullable(*variant_column))
|
||||
{
|
||||
auto & nullable_variant_column = assert_cast<ColumnNullable &>(*variant_column);
|
||||
non_nullable_variant_column = nullable_variant_column.getNestedColumnPtr()->assumeMutable();
|
||||
null_map = &nullable_variant_column.getNullMapData();
|
||||
}
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
null_map = &assert_cast<ColumnUInt8 &>(*variant_column).getData();
|
||||
}
|
||||
|
||||
auto variant_serialization = variant_type->getDefaultSerialization();
|
||||
|
||||
const auto & nullable_shared_variant = assert_cast<const ColumnNullable &>(*dynamic_element_state->shared_variant);
|
||||
const auto & shared_null_map = nullable_shared_variant.getNullMapData();
|
||||
const auto & shared_variant = assert_cast<const ColumnString &>(nullable_shared_variant.getNestedColumn());
|
||||
const FormatSettings format_settings;
|
||||
for (size_t i = prev_size; i != shared_variant.size(); ++i)
|
||||
{
|
||||
if (!shared_null_map[i])
|
||||
{
|
||||
auto value = shared_variant.getDataAt(i);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
if (type->getName() == dynamic_element_name)
|
||||
{
|
||||
/// When requested type is LowCardinality the subcolumn type name will be LowCardinality(Nullable).
|
||||
/// Value in shared variant is serialized as LowCardinality and we cannot simply deserialize it
|
||||
/// inside LowCardinality(Nullable) column (it will try to deserialize null bit). In this case we
|
||||
/// have to create temporary LowCardinality column, deserialize value into it and insert it into
|
||||
/// resulting LowCardinality(Nullable) (insertion from LowCardinality column to LowCardinality(Nullable)
|
||||
/// column is allowed).
|
||||
if (is_low_cardinality_nullable)
|
||||
{
|
||||
auto tmp_column = variant_type->createColumn();
|
||||
variant_serialization->deserializeBinary(*tmp_column, buf, format_settings);
|
||||
non_nullable_variant_column->insertFrom(*tmp_column, 0);
|
||||
}
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
null_map->push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
result_column = std::move(mutable_column);
|
||||
variant_serialization->deserializeBinary(*non_nullable_variant_column, buf, format_settings);
|
||||
if (null_map)
|
||||
null_map->push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
variant_column->insertDefault();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
variant_column->insertDefault();
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract nested subcolumn if needed.
|
||||
if (!nested_subcolumn.empty() && !is_null_map_subcolumn)
|
||||
{
|
||||
auto subcolumn = result_type->getSubcolumn(nested_subcolumn, variant_column->getPtr());
|
||||
result_column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,11 +13,15 @@ private:
|
||||
/// To be able to deserialize Dynamic element as a subcolumn
|
||||
/// we need its type name and global discriminator.
|
||||
String dynamic_element_name;
|
||||
/// Nested subcolumn of a type dynamic type. For example, for `Tuple(a UInt32)`.a
|
||||
/// subcolumn dynamic_element_name = 'Tuple(a UInt32)' and nested_subcolumn = 'a'.
|
||||
/// Needed to extract nested subcolumn from values in shared variant.
|
||||
String nested_subcolumn;
|
||||
bool is_null_map_subcolumn;
|
||||
|
||||
public:
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false)
|
||||
: SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_)
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, const String & nested_subcolumn_, bool is_null_map_subcolumn_ = false)
|
||||
: SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), nested_subcolumn(nested_subcolumn_), is_null_map_subcolumn(is_null_map_subcolumn_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -305,8 +305,10 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_)
|
||||
bool make_nullable_,
|
||||
const ColumnPtr & null_map_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, null_map(null_map_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
@ -314,12 +316,13 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
||||
|
||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DataTypePtr & prev) const
|
||||
{
|
||||
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const SerializationPtr & prev) const
|
||||
{
|
||||
return std::make_shared<SerializationVariantElement>(prev, variant_element_name, global_variant_discriminator);
|
||||
}
|
||||
@ -339,12 +342,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
return res;
|
||||
}
|
||||
|
||||
/// In general case we should iterate through discriminators and create null-map for our variant.
|
||||
NullMap null_map;
|
||||
null_map.reserve(local_discriminators->size());
|
||||
/// In general case we should iterate through discriminators and create null-map for our variant if we don't already have it.
|
||||
std::optional<NullMap> null_map_from_discriminators;
|
||||
if (!null_map)
|
||||
{
|
||||
null_map_from_discriminators = NullMap();
|
||||
null_map_from_discriminators->reserve(local_discriminators->size());
|
||||
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
|
||||
for (auto local_discr : local_discriminators_data)
|
||||
null_map.push_back(local_discr != local_variant_discriminator);
|
||||
null_map_from_discriminators->push_back(local_discr != local_variant_discriminator);
|
||||
}
|
||||
|
||||
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
||||
auto res_column = IColumn::mutate(prev);
|
||||
@ -356,15 +363,23 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
if (make_nullable && prev->lowCardinality())
|
||||
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
||||
|
||||
res_column->expand(null_map, /*inverted = */ true);
|
||||
if (null_map_from_discriminators)
|
||||
res_column->expand(*null_map_from_discriminators, /*inverted = */ true);
|
||||
else
|
||||
res_column->expand(assert_cast<const ColumnUInt8 &>(*null_map).getData(), /*inverted = */ true);
|
||||
|
||||
if (make_nullable && prev->canBeInsideNullable())
|
||||
{
|
||||
if (null_map_from_discriminators)
|
||||
{
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
null_map_col->getData() = std::move(null_map);
|
||||
null_map_col->getData() = std::move(*null_map_from_discriminators);
|
||||
return ColumnNullable::create(std::move(res_column), std::move(null_map_col));
|
||||
}
|
||||
|
||||
return ColumnNullable::create(std::move(res_column), null_map->assumeMutable());
|
||||
}
|
||||
|
||||
return res_column;
|
||||
}
|
||||
|
||||
|
@ -63,18 +63,22 @@ public:
|
||||
|
||||
struct VariantSubcolumnCreator : public ISubcolumnCreator
|
||||
{
|
||||
private:
|
||||
const ColumnPtr local_discriminators;
|
||||
const ColumnPtr null_map; /// optional
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
bool make_nullable;
|
||||
|
||||
public:
|
||||
VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_);
|
||||
bool make_nullable_,
|
||||
const ColumnPtr & null_map_ = nullptr);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
|
@ -1,2 +1,2 @@
|
||||
clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS})
|
||||
target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
|
||||
target_link_libraries(data_type_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
|
||||
|
@ -1153,8 +1153,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
query_context->setSetting("allow_experimental_object_type", 1);
|
||||
query_context->setSetting("allow_experimental_variant_type", 1);
|
||||
query_context->setSetting("allow_experimental_dynamic_type", 1);
|
||||
query_context->setSetting("allow_experimental_annoy_index", 1);
|
||||
query_context->setSetting("allow_experimental_usearch_index", 1);
|
||||
query_context->setSetting("allow_experimental_vector_similarity_index", 1);
|
||||
query_context->setSetting("allow_experimental_bigint_types", 1);
|
||||
query_context->setSetting("allow_experimental_window_functions", 1);
|
||||
query_context->setSetting("allow_experimental_geo_types", 1);
|
||||
@ -1584,6 +1583,8 @@ void DatabaseReplicated::dropTable(ContextPtr local_context, const String & tabl
|
||||
}
|
||||
|
||||
auto table = tryGetTable(table_name, getContext());
|
||||
if (!table)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} doesn't exist", table_name);
|
||||
if (table->getName() == "MaterializedView" || table->getName() == "WindowView")
|
||||
{
|
||||
/// Avoid recursive locking of metadata_mutex
|
||||
|
@ -1362,13 +1362,14 @@ public:
|
||||
}
|
||||
|
||||
auto & variant_column = column_dynamic.getVariantColumn();
|
||||
auto variant_info = column_dynamic.getVariantInfo();
|
||||
const auto & variant_info = column_dynamic.getVariantInfo();
|
||||
/// Second, infer ClickHouse type for this element and add it as a new variant.
|
||||
auto element_type = elementToDataType(element, format_settings);
|
||||
if (column_dynamic.addNewVariant(element_type))
|
||||
auto element_type_name = element_type->getName();
|
||||
if (column_dynamic.addNewVariant(element_type, element_type_name))
|
||||
{
|
||||
auto node = buildJSONExtractTree<JSONParser>(element_type, "Dynamic inference");
|
||||
auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()];
|
||||
auto global_discriminator = variant_info.variant_name_to_discriminator.at(element_type_name);
|
||||
auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator);
|
||||
if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error))
|
||||
return false;
|
||||
@ -1377,29 +1378,15 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
/// We couldn't add new variant. Try to insert element into current variants.
|
||||
auto variant_node = buildJSONExtractTree<JSONParser>(variant_info.variant_type, "Dynamic inference");
|
||||
if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error))
|
||||
return true;
|
||||
|
||||
/// We couldn't insert element into any existing variant, add String variant and read value as String.
|
||||
column_dynamic.addStringVariant();
|
||||
auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"];
|
||||
auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator);
|
||||
if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error))
|
||||
/// We couldn't add this variant, insert it into shared variant.
|
||||
auto tmp_variant_column = element_type->createColumn();
|
||||
auto node = buildJSONExtractTree<JSONParser>(element_type, "Dynamic inference");
|
||||
if (!node->insertResultToColumn(*tmp_variant_column, element, insert_settings, format_settings, error))
|
||||
return false;
|
||||
variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator));
|
||||
variant_column.getOffsets().push_back(string_column.size() - 1);
|
||||
column_dynamic.insertValueIntoSharedVariant(*tmp_variant_column, element_type, element_type_name, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
static const std::unique_ptr<JSONExtractTreeNode<JSONParser>> & getStringNode()
|
||||
{
|
||||
static const std::unique_ptr<JSONExtractTreeNode<JSONParser>> string_node
|
||||
= buildJSONExtractTree<JSONParser>(std::make_shared<DataTypeString>(), "Dynamic inference");
|
||||
return string_node;
|
||||
}
|
||||
|
||||
static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings)
|
||||
{
|
||||
JSONInferenceInfo json_inference_info;
|
||||
|
@ -164,7 +164,7 @@ try
|
||||
return {*iterator_data.cached_columns, *format_name};
|
||||
}
|
||||
|
||||
schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFileName());
|
||||
schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFilePath());
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -250,7 +250,7 @@ try
|
||||
|
||||
if (!names_and_types.empty())
|
||||
read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
|
||||
schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
|
||||
schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -411,7 +411,7 @@ try
|
||||
throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
|
||||
|
||||
read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
|
||||
schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
|
||||
schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath());
|
||||
}
|
||||
|
||||
if (format_name && mode == SchemaInferenceMode::DEFAULT)
|
||||
@ -527,9 +527,9 @@ try
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
auto file_name = read_buffer_iterator.getLastFileName();
|
||||
if (!file_name.empty())
|
||||
e.addMessage(fmt::format("(in file/uri {})", file_name));
|
||||
auto file_path = read_buffer_iterator.getLastFilePath();
|
||||
if (!file_path.empty())
|
||||
e.addMessage(fmt::format("(in file/uri {})", file_path));
|
||||
throw;
|
||||
}
|
||||
|
||||
|
@ -56,8 +56,8 @@ struct IReadBufferIterator
|
||||
/// Set auto detected format name.
|
||||
virtual void setFormatName(const String & /*format_name*/) {}
|
||||
|
||||
/// Get last processed file name for better exception messages.
|
||||
virtual String getLastFileName() const { return ""; }
|
||||
/// Get last processed file path for better exception messages.
|
||||
virtual String getLastFilePath() const { return ""; }
|
||||
|
||||
/// Return true if method recreateLastReadBuffer is implemented.
|
||||
virtual bool supportsLastReadBufferRecreation() const { return false; }
|
||||
|
@ -1,2 +1,2 @@
|
||||
clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS})
|
||||
target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions clickhouse_functions)
|
||||
target_link_libraries(format_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include <DataTypes/DataTypeDynamic.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/Serializations/SerializationDecimal.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
@ -4287,13 +4288,98 @@ private:
|
||||
WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const
|
||||
{
|
||||
return [this]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
/// When casting Dynamic to regular column we should cast all variants from current Dynamic column
|
||||
/// and construct the result based on discriminators.
|
||||
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
|
||||
const auto & variant_column = column_dynamic.getVariantColumn();
|
||||
const auto & variant_info = column_dynamic.getVariantInfo();
|
||||
auto variant_wrapper = createVariantToColumnWrapper(assert_cast<const DataTypeVariant &>(*variant_info.variant_type), result_type);
|
||||
ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")};
|
||||
return variant_wrapper(args, result_type, col_nullable, input_rows_count);
|
||||
|
||||
/// First, cast usual variants to result type.
|
||||
const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
|
||||
std::vector<ColumnPtr> casted_variant_columns;
|
||||
casted_variant_columns.reserve(variant_types.size());
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i);
|
||||
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}};
|
||||
auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
|
||||
casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
|
||||
}
|
||||
|
||||
/// Second, collect all variants stored in shared variant and cast them to result type.
|
||||
std::vector<MutableColumnPtr> variant_columns_from_shared_variant;
|
||||
DataTypes variant_types_from_shared_variant;
|
||||
/// We will need to know what variant to use when we see discriminator of a shared variant.
|
||||
/// To do it, we remember what variant was extracted from each row and what was it's offset.
|
||||
PaddedPODArray<UInt64> shared_variant_indexes;
|
||||
PaddedPODArray<UInt64> shared_variant_offsets;
|
||||
std::unordered_map<String, UInt64> shared_variant_to_index;
|
||||
const auto & shared_variant = column_dynamic.getSharedVariant();
|
||||
const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator();
|
||||
const auto & local_discriminators = variant_column.getLocalDiscriminators();
|
||||
const auto & offsets = variant_column.getOffsets();
|
||||
if (!shared_variant.empty())
|
||||
{
|
||||
shared_variant_indexes.reserve(input_rows_count);
|
||||
shared_variant_offsets.reserve(input_rows_count);
|
||||
FormatSettings format_settings;
|
||||
const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr);
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
{
|
||||
if (local_discriminators[i] == shared_variant_local_discr)
|
||||
{
|
||||
auto value = shared_variant.getDataAt(offsets[i]);
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto type = decodeDataType(buf);
|
||||
auto type_name = type->getName();
|
||||
auto it = shared_variant_to_index.find(type_name);
|
||||
/// Check if we didn't create column for this variant yet.
|
||||
if (it == shared_variant_to_index.end())
|
||||
{
|
||||
it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first;
|
||||
variant_columns_from_shared_variant.push_back(type->createColumn());
|
||||
variant_types_from_shared_variant.push_back(type);
|
||||
}
|
||||
|
||||
shared_variant_indexes.push_back(it->second);
|
||||
shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size());
|
||||
type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
shared_variant_indexes.emplace_back();
|
||||
shared_variant_offsets.emplace_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast all extracted variants into result type.
|
||||
std::vector<ColumnPtr> casted_shared_variant_columns;
|
||||
casted_shared_variant_columns.reserve(variant_types_from_shared_variant.size());
|
||||
for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
|
||||
{
|
||||
ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}};
|
||||
auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
|
||||
casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()));
|
||||
}
|
||||
|
||||
/// Construct result column from all casted variants.
|
||||
auto res = result_type->createColumn();
|
||||
res->reserve(input_rows_count);
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
{
|
||||
auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
res->insertDefault();
|
||||
else if (global_discr == shared_variant_discr)
|
||||
res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
|
||||
else
|
||||
res->insertFrom(*casted_variant_columns[global_discr], offsets[i]);
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
@ -4320,200 +4406,51 @@ private:
|
||||
};
|
||||
}
|
||||
|
||||
std::pair<ColumnPtr, DataTypePtr> getReducedVariant(
|
||||
const ColumnVariant & variant_column,
|
||||
const DataTypePtr & variant_type,
|
||||
const std::unordered_map<String, ColumnVariant::Discriminator> & variant_name_to_discriminator,
|
||||
size_t max_result_num_variants,
|
||||
const ColumnDynamic::Statistics & statistics = {}) const
|
||||
WrapperType createVariantToDynamicWrapper(const DataTypeVariant & from_variant_type, const DataTypeDynamic & dynamic_type) const
|
||||
{
|
||||
const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
|
||||
/// First check if we don't exceed the limit in current Variant column.
|
||||
if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String")))
|
||||
return {variant_column.getPtr(), variant_type};
|
||||
/// First create extended Variant with shared variant type and cast this Variant to it.
|
||||
auto variants_for_dynamic = from_variant_type.getVariants();
|
||||
size_t number_of_variants = variants_for_dynamic.size();
|
||||
variants_for_dynamic.push_back(ColumnDynamic::getSharedVariantDataType());
|
||||
const auto & variant_type_for_dynamic = std::make_shared<DataTypeVariant>(variants_for_dynamic);
|
||||
auto old_to_new_variant_wrapper = createVariantToVariantWrapper(from_variant_type, *variant_type_for_dynamic);
|
||||
auto max_dynamic_types = dynamic_type.getMaxDynamicTypes();
|
||||
return [old_to_new_variant_wrapper, variant_type_for_dynamic, number_of_variants, max_dynamic_types]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
auto variant_column_for_dynamic = old_to_new_variant_wrapper(arguments, result_type, col_nullable, input_rows_count);
|
||||
/// If resulting Dynamic column can contain all variants from this Variant column, just create Dynamic column from it.
|
||||
if (max_dynamic_types >= number_of_variants)
|
||||
return ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, max_dynamic_types, max_dynamic_types);
|
||||
|
||||
/// We want to keep the most frequent variants and convert to string the rarest.
|
||||
std::vector<std::pair<size_t, ColumnVariant::Discriminator>> variant_sizes;
|
||||
variant_sizes.reserve(variant_types.size());
|
||||
std::optional<ColumnVariant::Discriminator> old_string_discriminator;
|
||||
/// List of variants that should be converted to a single String variant.
|
||||
std::vector<ColumnVariant::Discriminator> variants_to_convert_to_string;
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
/// String variant won't be removed.
|
||||
String variant_name = variant_types[i]->getName();
|
||||
|
||||
if (variant_name == "String")
|
||||
{
|
||||
old_string_discriminator = i;
|
||||
/// For simplicity, add this variant to the list that will be converted to string,
|
||||
/// so we will process it with other variants when constructing the new String variant.
|
||||
variants_to_convert_to_string.push_back(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size = 0;
|
||||
if (statistics.data.empty())
|
||||
size = variant_column.getVariantByGlobalDiscriminator(i).size();
|
||||
else
|
||||
size = statistics.data.at(variant_name);
|
||||
variant_sizes.emplace_back(size, i);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sort variants by sizes, so we will keep the most frequent.
|
||||
std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater());
|
||||
|
||||
DataTypes remaining_variants;
|
||||
remaining_variants.reserve(max_result_num_variants);
|
||||
/// Add String variant in advance.
|
||||
remaining_variants.push_back(std::make_shared<DataTypeString>());
|
||||
for (auto [_, discr] : variant_sizes)
|
||||
{
|
||||
if (remaining_variants.size() != max_result_num_variants)
|
||||
remaining_variants.push_back(variant_types[discr]);
|
||||
else
|
||||
variants_to_convert_to_string.push_back(discr);
|
||||
}
|
||||
|
||||
auto reduced_variant = std::make_shared<DataTypeVariant>(remaining_variants);
|
||||
const auto & new_variants = reduced_variant->getVariants();
|
||||
/// To construct reduced variant column we will need mapping from old to new discriminators.
|
||||
std::vector<ColumnVariant::Discriminator> old_to_new_discriminators_mapping;
|
||||
old_to_new_discriminators_mapping.resize(variant_types.size());
|
||||
ColumnVariant::Discriminator string_variant_discriminator = 0;
|
||||
for (size_t i = 0; i != new_variants.size(); ++i)
|
||||
{
|
||||
String variant_name = new_variants[i]->getName();
|
||||
if (variant_name == "String")
|
||||
{
|
||||
string_variant_discriminator = i;
|
||||
for (auto discr : variants_to_convert_to_string)
|
||||
old_to_new_discriminators_mapping[discr] = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto old_discr = variant_name_to_discriminator.at(variant_name);
|
||||
old_to_new_discriminators_mapping[old_discr] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert all reduced variants to String.
|
||||
std::unordered_map<ColumnVariant::Discriminator, ColumnPtr> variants_converted_to_string;
|
||||
variants_converted_to_string.reserve(variants_to_convert_to_string.size());
|
||||
size_t string_variant_size = 0;
|
||||
for (auto discr : variants_to_convert_to_string)
|
||||
{
|
||||
auto string_type = std::make_shared<DataTypeString>();
|
||||
auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type);
|
||||
auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], "");
|
||||
ColumnsWithTypeAndName args = {column_to_convert};
|
||||
auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size());
|
||||
string_variant_size += variant_string_column->size();
|
||||
variants_converted_to_string[discr] = variant_string_column;
|
||||
}
|
||||
|
||||
/// Create new discriminators and offsets and fill new String variant according to old discriminators.
|
||||
auto string_variant = ColumnString::create();
|
||||
string_variant->reserve(string_variant_size);
|
||||
auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty();
|
||||
auto & new_discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators &>(*new_discriminators_column).getData();
|
||||
new_discriminators_data.reserve(variant_column.size());
|
||||
auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty();
|
||||
auto & new_offsets_data = assert_cast<ColumnVariant::ColumnOffsets &>(*new_offsets).getData();
|
||||
new_offsets_data.reserve(variant_column.size());
|
||||
const auto & old_local_discriminators = variant_column.getLocalDiscriminators();
|
||||
const auto & old_offsets = variant_column.getOffsets();
|
||||
for (size_t i = 0; i != old_local_discriminators.size(); ++i)
|
||||
{
|
||||
auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]);
|
||||
|
||||
if (old_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
new_offsets_data.push_back(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto new_discr = old_to_new_discriminators_mapping[old_discr];
|
||||
new_discriminators_data.push_back(new_discr);
|
||||
if (new_discr != string_variant_discriminator)
|
||||
{
|
||||
new_offsets_data.push_back(old_offsets[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_offsets_data.push_back(string_variant->size());
|
||||
string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new list of variant columns.
|
||||
Columns new_variant_columns;
|
||||
new_variant_columns.resize(new_variants.size());
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
auto new_discr = old_to_new_discriminators_mapping[i];
|
||||
if (new_discr != string_variant_discriminator)
|
||||
new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i);
|
||||
}
|
||||
new_variant_columns[string_variant_discriminator] = std::move(string_variant);
|
||||
return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant};
|
||||
}
|
||||
|
||||
WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
|
||||
{
|
||||
const auto & from_variant_type = assert_cast<const DataTypeVariant &>(*from_type);
|
||||
size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes();
|
||||
const auto & variants = from_variant_type.getVariants();
|
||||
std::unordered_map<String, ColumnVariant::Discriminator> variant_name_to_discriminator;
|
||||
variant_name_to_discriminator.reserve(variants.size());
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
variant_name_to_discriminator[variants[i]->getName()] = i;
|
||||
|
||||
return [from_type, max_dynamic_types, variant_name_to_discriminator, this]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & variant_column = assert_cast<const ColumnVariant &>(*arguments.front().column);
|
||||
auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types);
|
||||
return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types);
|
||||
/// Otherwise some variants should go to the shared variant. Create temporary Dynamic column from this Variant and insert
|
||||
/// all data to the resulting Dynamic column, this insertion will do all the logic with shared variant.
|
||||
auto tmp_dynamic_column = ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, number_of_variants, number_of_variants);
|
||||
auto result_dynamic_column = ColumnDynamic::create(max_dynamic_types);
|
||||
result_dynamic_column->insertRangeFrom(*tmp_dynamic_column, 0, tmp_dynamic_column->size());
|
||||
return result_dynamic_column;
|
||||
};
|
||||
}
|
||||
|
||||
WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
|
||||
{
|
||||
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(from_type.get()))
|
||||
return createVariantToDynamicWrapper(from_type, dynamic_type);
|
||||
|
||||
if (dynamic_type.getMaxDynamicTypes() == 1)
|
||||
{
|
||||
DataTypePtr string_type = std::make_shared<DataTypeString>();
|
||||
if (from_type->isNullable())
|
||||
string_type = makeNullable(string_type);
|
||||
auto string_wrapper = prepareUnpackDictionaries(from_type, string_type);
|
||||
auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullable(string_type)});
|
||||
auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type);
|
||||
return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count);
|
||||
auto column = ColumnWithTypeAndName(string_column, string_type, "");
|
||||
ColumnsWithTypeAndName args = {column};
|
||||
auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size());
|
||||
return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types);
|
||||
};
|
||||
}
|
||||
return createVariantToDynamicWrapper(*variant_type, dynamic_type);
|
||||
|
||||
if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type))))
|
||||
return createStringToDynamicThroughParsingWrapper();
|
||||
|
||||
/// First, cast column to Variant with 2 variants - the type of the column we cast and shared variant type.
|
||||
auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullableOrLowCardinalityNullable(from_type)});
|
||||
auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type);
|
||||
return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
|
||||
auto column_to_variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type);
|
||||
/// Second, cast this Variant to Dynamic.
|
||||
auto variant_to_dynamic_wrapper = createVariantToDynamicWrapper(*variant_type, dynamic_type);
|
||||
return [column_to_variant_wrapper, variant_to_dynamic_wrapper, variant_type]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count);
|
||||
return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types);
|
||||
auto variant_res = column_to_variant_wrapper(arguments, variant_type, col_nullable, input_rows_count);
|
||||
ColumnsWithTypeAndName args = {{variant_res, variant_type, ""}};
|
||||
return variant_to_dynamic_wrapper(args, result_type, nullptr, input_rows_count);
|
||||
};
|
||||
}
|
||||
|
||||
@ -4530,21 +4467,26 @@ private:
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
||||
return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types);
|
||||
/// We should use the same limit as already used in column and change only global limit.
|
||||
/// It's needed because shared variant should contain values only when limit is exceeded,
|
||||
/// so if there are already some data, we cannot increase the limit.
|
||||
return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
|
||||
};
|
||||
}
|
||||
|
||||
return [to_max_types, this]
|
||||
return [to_max_types]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
||||
auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(
|
||||
column_dynamic.getVariantColumn(),
|
||||
column_dynamic.getVariantInfo().variant_type,
|
||||
column_dynamic.getVariantInfo().variant_name_to_discriminator,
|
||||
to_max_types,
|
||||
column_dynamic.getStatistics());
|
||||
return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types);
|
||||
/// If real limit in the column is not greater than desired, just use the same variant column.
|
||||
if (column_dynamic.getMaxDynamicTypes() <= to_max_types)
|
||||
return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
|
||||
|
||||
/// Otherwise some variants should go to the shared variant. In this case we can just insert all
|
||||
/// the data into resulting column and it will do all the logic with shared variant.
|
||||
auto result_dynamic_column = ColumnDynamic::create(to_max_types);
|
||||
result_dynamic_column->insertRangeFrom(column_dynamic, 0, column_dynamic.size());
|
||||
return result_dynamic_column;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -2,10 +2,14 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypesBinaryEncoding.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
@ -65,11 +69,15 @@ public:
|
||||
const auto & variant_column = dynamic_column->getVariantColumn();
|
||||
auto res = result_type->createColumn();
|
||||
String element_type;
|
||||
auto shared_variant_discr = dynamic_column->getSharedVariantDiscriminator();
|
||||
const auto & shared_variant = dynamic_column->getSharedVariant();
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
{
|
||||
auto global_discr = variant_column.globalDiscriminatorAt(i);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
element_type = name_for_null;
|
||||
else if (global_discr == shared_variant_discr)
|
||||
element_type = getTypeNameFromSharedVariantValue(shared_variant.getDataAt(variant_column.offsetAt(i)));
|
||||
else
|
||||
element_type = variant_info.variant_names[global_discr];
|
||||
|
||||
@ -78,6 +86,63 @@ public:
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
String getTypeNameFromSharedVariantValue(StringRef value) const
|
||||
{
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
return decodeDataType(buf)->getName();
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionIsDynamicElementInSharedData : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "isDynamicElementInSharedData";
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIsDynamicElementInSharedData>(); }
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.empty() || arguments.size() > 1)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be 1",
|
||||
getName(), arguments.empty());
|
||||
|
||||
if (!isDynamic(arguments[0].type.get()))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for function {} must be Dynamic, got {} instead",
|
||||
getName(), arguments[0].type->getName());
|
||||
|
||||
return DataTypeFactory::instance().get("Bool");
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnDynamic * dynamic_column = checkAndGetColumn<ColumnDynamic>(arguments[0].column.get());
|
||||
if (!dynamic_column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for function {} must be Dynamic, got {} instead",
|
||||
getName(), arguments[0].type->getName());
|
||||
|
||||
const auto & variant_column = dynamic_column->getVariantColumn();
|
||||
const auto & local_discriminators = variant_column.getLocalDiscriminators();
|
||||
auto res = result_type->createColumn();
|
||||
auto & res_data = assert_cast<ColumnUInt8 &>(*res).getData();
|
||||
res_data.reserve(dynamic_column->size());
|
||||
auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column->getSharedVariantDiscriminator());
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
res_data.push_back(local_discriminators[i] == shared_variant_local_discr);
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@ -88,7 +153,7 @@ REGISTER_FUNCTION(DynamicType)
|
||||
.description = R"(
|
||||
Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it.
|
||||
)",
|
||||
.syntax = {"dynamicType(variant)"},
|
||||
.syntax = {"dynamicType(dynamic)"},
|
||||
.arguments = {{"dynamic", "Dynamic column"}},
|
||||
.examples = {{{
|
||||
"Example",
|
||||
@ -104,6 +169,30 @@ SELECT d, dynamicType(d) FROM test;
|
||||
│ Hello, World! │ String │
|
||||
│ [1,2,3] │ Array(Int64) │
|
||||
└───────────────┴────────────────┘
|
||||
)"}}},
|
||||
.categories{"Variant"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionIsDynamicElementInSharedData>(FunctionDocumentation{
|
||||
.description = R"(
|
||||
Returns true for rows in Dynamic column that are not separated into subcolumns and stored inside shared variant in binary form.
|
||||
)",
|
||||
.syntax = {"isDynamicElementInSharedData(dynamic)"},
|
||||
.arguments = {{"dynamic", "Dynamic column"}},
|
||||
.examples = {{{
|
||||
"Example",
|
||||
R"(
|
||||
CREATE TABLE test (d Dynamic(max_types=2)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
|
||||
SELECT d, isDynamicElementInSharedData(d) FROM test;
|
||||
)",
|
||||
R"(
|
||||
┌─d─────────────┬─isDynamicElementInSharedData(d)─┐
|
||||
│ ᴺᵁᴸᴸ │ false │
|
||||
│ 42 │ false │
|
||||
│ Hello, World! │ true │
|
||||
│ [1,2,3] │ true │
|
||||
└───────────────┴────────────────────┘
|
||||
)"}}},
|
||||
.categories{"Variant"},
|
||||
});
|
||||
|
@ -34,14 +34,20 @@ namespace ErrorCodes
|
||||
extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS;
|
||||
}
|
||||
|
||||
void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
|
||||
void setResponseDefaultHeaders(HTTPServerResponse & response)
|
||||
{
|
||||
if (!response.getKeepAlive())
|
||||
return;
|
||||
|
||||
Poco::Timespan timeout(keep_alive_timeout, 0);
|
||||
if (timeout.totalSeconds())
|
||||
response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds()));
|
||||
const size_t keep_alive_timeout = response.getSession().getKeepAliveTimeout();
|
||||
const size_t keep_alive_max_requests = response.getSession().getMaxKeepAliveRequests();
|
||||
if (keep_alive_timeout)
|
||||
{
|
||||
if (keep_alive_max_requests)
|
||||
response.set("Keep-Alive", fmt::format("timeout={}, max={}", keep_alive_timeout, keep_alive_max_requests));
|
||||
else
|
||||
response.set("Keep-Alive", fmt::format("timeout={}", keep_alive_timeout));
|
||||
}
|
||||
}
|
||||
|
||||
HTTPSessionPtr makeHTTPSession(
|
||||
|
@ -54,7 +54,7 @@ private:
|
||||
|
||||
using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
|
||||
|
||||
void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout);
|
||||
void setResponseDefaultHeaders(HTTPServerResponse & response);
|
||||
|
||||
/// Create session object to perform requests and set required parameters.
|
||||
HTTPSessionPtr makeHTTPSession(
|
||||
|
@ -128,7 +128,7 @@ namespace
|
||||
|
||||
bool isQueryCacheRelatedSetting(const String & setting_name)
|
||||
{
|
||||
return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
|
||||
return (setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache")) && setting_name != "query_cache_tag";
|
||||
}
|
||||
|
||||
class RemoveQueryCacheSettingsMatcher
|
||||
@ -242,11 +242,18 @@ QueryCache::Key::Key(
|
||||
, expires_at(expires_at_)
|
||||
, is_compressed(is_compressed_)
|
||||
, query_string(queryStringFromAST(ast_))
|
||||
, tag(settings.query_cache_tag)
|
||||
{
|
||||
}
|
||||
|
||||
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
|
||||
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
|
||||
QueryCache::Key::Key(
|
||||
ASTPtr ast_,
|
||||
const String & current_database,
|
||||
const Settings & settings,
|
||||
std::optional<UUID> user_id_,
|
||||
const std::vector<UUID> & current_user_roles_)
|
||||
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false)
|
||||
/// ^^ dummy values for everything != AST, current database, user name/roles
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,11 @@ public:
|
||||
/// SYSTEM.QUERY_CACHE.
|
||||
const String query_string;
|
||||
|
||||
/// A tag (namespace) for distinguish multiple entries of the same query.
|
||||
/// This member has currently no use besides that SYSTEM.QUERY_CACHE can populate the 'tag' column conveniently without having to
|
||||
/// compute the tag from the query AST.
|
||||
const String tag;
|
||||
|
||||
/// Ctor to construct a Key for writing into query cache.
|
||||
Key(ASTPtr ast_,
|
||||
const String & current_database,
|
||||
@ -99,7 +104,10 @@ public:
|
||||
bool is_compressed);
|
||||
|
||||
/// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
|
||||
Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
|
||||
Key(ASTPtr ast_,
|
||||
const String & current_database,
|
||||
const Settings & settings,
|
||||
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
|
||||
|
||||
bool operator==(const Key & other) const;
|
||||
};
|
||||
|
@ -787,10 +787,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
|
||||
if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
|
||||
throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'");
|
||||
/// ----
|
||||
if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index");
|
||||
if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index");
|
||||
if (index_desc.type == "vector_similarity" && !settings.allow_experimental_vector_similarity_index)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index is disabled. Turn on allow_experimental_vector_similarity_index");
|
||||
|
||||
properties.indices.push_back(index_desc);
|
||||
}
|
||||
|
@ -663,13 +663,16 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
startStopAction(ActionLocks::ViewRefresh, false);
|
||||
break;
|
||||
case Type::REFRESH_VIEW:
|
||||
getRefreshTask()->run();
|
||||
for (const auto & task : getRefreshTasks())
|
||||
task->run();
|
||||
break;
|
||||
case Type::CANCEL_VIEW:
|
||||
getRefreshTask()->cancel();
|
||||
for (const auto & task : getRefreshTasks())
|
||||
task->cancel();
|
||||
break;
|
||||
case Type::TEST_VIEW:
|
||||
getRefreshTask()->setFakeTime(query.fake_time_for_view);
|
||||
for (const auto & task : getRefreshTasks())
|
||||
task->setFakeTime(query.fake_time_for_view);
|
||||
break;
|
||||
case Type::DROP_REPLICA:
|
||||
dropReplica(query);
|
||||
@ -1242,15 +1245,15 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery & query)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported");
|
||||
}
|
||||
|
||||
RefreshTaskHolder InterpreterSystemQuery::getRefreshTask()
|
||||
RefreshTaskList InterpreterSystemQuery::getRefreshTasks()
|
||||
{
|
||||
auto ctx = getContext();
|
||||
ctx->checkAccess(AccessType::SYSTEM_VIEWS);
|
||||
auto task = ctx->getRefreshSet().getTask(table_id);
|
||||
if (!task)
|
||||
auto tasks = ctx->getRefreshSet().findTasks(table_id);
|
||||
if (tasks.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "Refreshable view {} doesn't exist", table_id.getNameForLogs());
|
||||
return task;
|
||||
return tasks;
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,7 +74,7 @@ private:
|
||||
void flushDistributed(ASTSystemQuery & query);
|
||||
[[noreturn]] void restartDisk(String & name);
|
||||
|
||||
RefreshTaskHolder getRefreshTask();
|
||||
RefreshTaskList getRefreshTasks();
|
||||
|
||||
AccessRightsElements getRequiredAccessForDDLOnCluster() const;
|
||||
void startStopAction(StorageActionBlockType action_type, bool start);
|
||||
|
@ -13,8 +13,7 @@ class ASTIndexDeclaration : public IAST
|
||||
{
|
||||
public:
|
||||
static const auto DEFAULT_INDEX_GRANULARITY = 1uz;
|
||||
static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz;
|
||||
static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz;
|
||||
static const auto DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY = 100'000'000uz;
|
||||
|
||||
ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_);
|
||||
|
||||
|
@ -97,24 +97,9 @@ namespace
|
||||
|
||||
void formatCurrentGrantsElements(const AccessRightsElements & elements, const IAST::FormatSettings & settings)
|
||||
{
|
||||
for (size_t i = 0; i != elements.size(); ++i)
|
||||
{
|
||||
const auto & element = elements[i];
|
||||
|
||||
bool next_element_on_same_db_and_table = false;
|
||||
if (i != elements.size() - 1)
|
||||
{
|
||||
const auto & next_element = elements[i + 1];
|
||||
if (element.sameDatabaseAndTableAndParameter(next_element))
|
||||
next_element_on_same_db_and_table = true;
|
||||
}
|
||||
|
||||
if (!next_element_on_same_db_and_table)
|
||||
{
|
||||
settings.ostr << " ";
|
||||
formatONClause(element, settings);
|
||||
}
|
||||
}
|
||||
settings.ostr << "(";
|
||||
formatElementsWithoutOptions(elements, settings);
|
||||
settings.ostr << ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -89,10 +89,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
else
|
||||
{
|
||||
auto index_type = index->getType();
|
||||
if (index_type && index_type->name == "annoy")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
|
||||
else if (index_type && index_type->name == "usearch")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
|
||||
if (index_type && index_type->name == "vector_similarity")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY;
|
||||
else
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
|
||||
}
|
||||
|
@ -214,10 +214,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
else
|
||||
{
|
||||
auto index_type = index->getType();
|
||||
if (index_type->name == "annoy")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
|
||||
else if (index_type->name == "usearch")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
|
||||
if (index_type->name == "vector_similarity")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY;
|
||||
else
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
|
||||
|
||||
clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
|
||||
|
||||
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
|
||||
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier -Wno-extra-semi-stmt -Wno-used-but-marked-unused")
|
||||
|
||||
# contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof]
|
||||
target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
@ -203,33 +204,23 @@ template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
|
||||
void ORCBlockOutputFormat::writeNumbers(
|
||||
orc::ColumnVectorBatch & orc_column,
|
||||
const IColumn & column,
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
const PaddedPODArray<UInt8> * /*null_bytemap*/,
|
||||
ConvertFunc convert)
|
||||
{
|
||||
NumberVectorBatch & number_orc_column = dynamic_cast<NumberVectorBatch &>(orc_column);
|
||||
const auto & number_column = assert_cast<const ColumnVector<NumberType> &>(column);
|
||||
number_orc_column.resize(number_column.size());
|
||||
|
||||
number_orc_column.data.resize(number_column.size());
|
||||
for (size_t i = 0; i != number_column.size(); ++i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
number_orc_column.notNull[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
number_orc_column.notNull[i] = 1;
|
||||
number_orc_column.data[i] = convert(number_column.getElement(i));
|
||||
}
|
||||
number_orc_column.numElements = number_column.size();
|
||||
}
|
||||
|
||||
template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
|
||||
void ORCBlockOutputFormat::writeDecimals(
|
||||
orc::ColumnVectorBatch & orc_column,
|
||||
const IColumn & column,
|
||||
DataTypePtr & type,
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
const PaddedPODArray<UInt8> * /*null_bytemap*/,
|
||||
ConvertFunc convert)
|
||||
{
|
||||
DecimalVectorBatch & decimal_orc_column = dynamic_cast<DecimalVectorBatch &>(orc_column);
|
||||
@ -238,71 +229,49 @@ void ORCBlockOutputFormat::writeDecimals(
|
||||
decimal_orc_column.precision = decimal_type->getPrecision();
|
||||
decimal_orc_column.scale = decimal_type->getScale();
|
||||
decimal_orc_column.resize(decimal_column.size());
|
||||
for (size_t i = 0; i != decimal_column.size(); ++i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
decimal_orc_column.notNull[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
decimal_orc_column.notNull[i] = 1;
|
||||
decimal_orc_column.values.resize(decimal_column.size());
|
||||
for (size_t i = 0; i != decimal_column.size(); ++i)
|
||||
decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
|
||||
}
|
||||
decimal_orc_column.numElements = decimal_column.size();
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
void ORCBlockOutputFormat::writeStrings(
|
||||
orc::ColumnVectorBatch & orc_column,
|
||||
const IColumn & column,
|
||||
const PaddedPODArray<UInt8> * null_bytemap)
|
||||
const PaddedPODArray<UInt8> * /*null_bytemap*/)
|
||||
{
|
||||
orc::StringVectorBatch & string_orc_column = dynamic_cast<orc::StringVectorBatch &>(orc_column);
|
||||
const auto & string_column = assert_cast<const ColumnType &>(column);
|
||||
string_orc_column.resize(string_column.size());
|
||||
|
||||
string_orc_column.data.resize(string_column.size());
|
||||
string_orc_column.length.resize(string_column.size());
|
||||
for (size_t i = 0; i != string_column.size(); ++i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
string_orc_column.notNull[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
string_orc_column.notNull[i] = 1;
|
||||
const std::string_view & string = string_column.getDataAt(i).toView();
|
||||
string_orc_column.data[i] = const_cast<char *>(string.data());
|
||||
string_orc_column.length[i] = string.size();
|
||||
}
|
||||
string_orc_column.numElements = string_column.size();
|
||||
}
|
||||
|
||||
template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
|
||||
void ORCBlockOutputFormat::writeDateTimes(
|
||||
orc::ColumnVectorBatch & orc_column,
|
||||
const IColumn & column,
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
const PaddedPODArray<UInt8> * /*null_bytemap*/,
|
||||
GetSecondsFunc get_seconds,
|
||||
GetNanosecondsFunc get_nanoseconds)
|
||||
{
|
||||
orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch &>(orc_column);
|
||||
const auto & timestamp_column = assert_cast<const ColumnType &>(column);
|
||||
timestamp_orc_column.resize(timestamp_column.size());
|
||||
|
||||
timestamp_orc_column.data.resize(timestamp_column.size());
|
||||
timestamp_orc_column.nanoseconds.resize(timestamp_column.size());
|
||||
for (size_t i = 0; i != timestamp_column.size(); ++i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[i])
|
||||
{
|
||||
timestamp_orc_column.notNull[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
timestamp_orc_column.notNull[i] = 1;
|
||||
timestamp_orc_column.data[i] = static_cast<int64_t>(get_seconds(timestamp_column.getElement(i)));
|
||||
timestamp_orc_column.nanoseconds[i] = static_cast<int64_t>(get_nanoseconds(timestamp_column.getElement(i)));
|
||||
}
|
||||
timestamp_orc_column.numElements = timestamp_column.size();
|
||||
}
|
||||
|
||||
void ORCBlockOutputFormat::writeColumn(
|
||||
@ -311,9 +280,27 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
DataTypePtr & type,
|
||||
const PaddedPODArray<UInt8> * null_bytemap)
|
||||
{
|
||||
orc_column.notNull.resize(column.size());
|
||||
size_t rows = column.size();
|
||||
orc_column.resize(rows);
|
||||
orc_column.numElements = rows;
|
||||
|
||||
/// Calculate orc_column.hasNulls
|
||||
if (null_bytemap)
|
||||
orc_column.hasNulls = true;
|
||||
orc_column.hasNulls = !memoryIsZero(null_bytemap->data(), 0, null_bytemap->size());
|
||||
else
|
||||
orc_column.hasNulls = false;
|
||||
|
||||
/// Fill orc_column.notNull
|
||||
if (orc_column.hasNulls)
|
||||
{
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
orc_column.notNull[i] = !(*null_bytemap)[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
orc_column.notNull[i] = 1;
|
||||
}
|
||||
|
||||
/// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
|
||||
/// make the ORC library calculate min and max correctly.
|
||||
@ -471,6 +458,7 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
}
|
||||
case TypeIndex::Nullable:
|
||||
{
|
||||
chassert(!null_bytemap);
|
||||
const auto & nullable_column = assert_cast<const ColumnNullable &>(column);
|
||||
const PaddedPODArray<UInt8> & new_null_bytemap = assert_cast<const ColumnVector<UInt8> &>(*nullable_column.getNullMapColumnPtr()).getData();
|
||||
auto nested_type = removeNullable(type);
|
||||
@ -485,19 +473,15 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
const ColumnArray::Offsets & offsets = list_column.getOffsets();
|
||||
|
||||
size_t column_size = list_column.size();
|
||||
list_orc_column.resize(column_size);
|
||||
list_orc_column.offsets.resize(column_size + 1);
|
||||
|
||||
/// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
|
||||
list_orc_column.offsets[0] = 0;
|
||||
for (size_t i = 0; i != column_size; ++i)
|
||||
{
|
||||
list_orc_column.offsets[i + 1] = offsets[i];
|
||||
list_orc_column.notNull[i] = 1;
|
||||
}
|
||||
|
||||
orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
|
||||
writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
|
||||
list_orc_column.numElements = column_size;
|
||||
writeColumn(nested_orc_column, list_column.getData(), nested_type, nullptr);
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Tuple:
|
||||
@ -505,10 +489,8 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
orc::StructVectorBatch & struct_orc_column = dynamic_cast<orc::StructVectorBatch &>(orc_column);
|
||||
const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
|
||||
auto nested_types = assert_cast<const DataTypeTuple *>(type.get())->getElements();
|
||||
for (size_t i = 0; i != tuple_column.size(); ++i)
|
||||
struct_orc_column.notNull[i] = 1;
|
||||
for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
|
||||
writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap);
|
||||
writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], nullptr);
|
||||
break;
|
||||
}
|
||||
case TypeIndex::Map:
|
||||
@ -520,25 +502,21 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
|
||||
size_t column_size = list_column.size();
|
||||
|
||||
map_orc_column.resize(list_column.size());
|
||||
map_orc_column.offsets.resize(column_size + 1);
|
||||
/// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
|
||||
map_orc_column.offsets[0] = 0;
|
||||
for (size_t i = 0; i != column_size; ++i)
|
||||
{
|
||||
map_orc_column.offsets[i + 1] = offsets[i];
|
||||
map_orc_column.notNull[i] = 1;
|
||||
}
|
||||
|
||||
const auto nested_columns = assert_cast<const ColumnTuple *>(list_column.getDataPtr().get())->getColumns();
|
||||
|
||||
orc::ColumnVectorBatch & keys_orc_column = *map_orc_column.keys;
|
||||
auto key_type = map_type.getKeyType();
|
||||
writeColumn(keys_orc_column, *nested_columns[0], key_type, null_bytemap);
|
||||
writeColumn(keys_orc_column, *nested_columns[0], key_type, nullptr);
|
||||
|
||||
orc::ColumnVectorBatch & values_orc_column = *map_orc_column.elements;
|
||||
auto value_type = map_type.getValueType();
|
||||
writeColumn(values_orc_column, *nested_columns[1], value_type, null_bytemap);
|
||||
|
||||
map_orc_column.numElements = column_size;
|
||||
writeColumn(values_orc_column, *nested_columns[1], value_type, nullptr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -546,27 +524,6 @@ void ORCBlockOutputFormat::writeColumn(
|
||||
}
|
||||
}
|
||||
|
||||
size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type)
|
||||
{
|
||||
if (type->getTypeId() == TypeIndex::Array)
|
||||
{
|
||||
auto nested_type = assert_cast<const DataTypeArray &>(*type).getNestedType();
|
||||
const IColumn & nested_column = assert_cast<const ColumnArray &>(column).getData();
|
||||
return std::max(column.size(), getColumnSize(nested_column, nested_type));
|
||||
}
|
||||
|
||||
return column.size();
|
||||
}
|
||||
|
||||
size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk)
|
||||
{
|
||||
size_t columns_num = chunk.getNumColumns();
|
||||
size_t max_column_size = 0;
|
||||
for (size_t i = 0; i != columns_num; ++i)
|
||||
max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i]));
|
||||
return max_column_size;
|
||||
}
|
||||
|
||||
void ORCBlockOutputFormat::consume(Chunk chunk)
|
||||
{
|
||||
if (!writer)
|
||||
@ -575,10 +532,7 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
|
||||
size_t columns_num = chunk.getNumColumns();
|
||||
size_t rows_num = chunk.getNumRows();
|
||||
|
||||
/// getMaxColumnSize is needed to write arrays.
|
||||
/// The size of the batch must be no less than total amount of array elements
|
||||
/// and no less than the number of rows (ORC writes a null bit for every row).
|
||||
std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
|
||||
std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(chunk.getNumRows());
|
||||
orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
|
||||
|
||||
auto columns = chunk.detachColumns();
|
||||
|
@ -69,11 +69,6 @@ private:
|
||||
|
||||
void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
|
||||
|
||||
/// These two functions are needed to know maximum nested size of arrays to
|
||||
/// create an ORC Batch with the appropriate size
|
||||
size_t getColumnSize(const IColumn & column, DataTypePtr & type);
|
||||
size_t getMaxColumnSize(Chunk & chunk);
|
||||
|
||||
void prepareWriter();
|
||||
|
||||
const FormatSettings format_settings;
|
||||
|
@ -24,8 +24,8 @@
|
||||
#include <Processors/Transforms/SelectByIndicesTransform.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
|
||||
@ -52,6 +52,8 @@
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
using namespace DB;
|
||||
|
||||
namespace
|
||||
@ -1474,16 +1476,14 @@ static void buildIndexes(
|
||||
else
|
||||
{
|
||||
MergeTreeIndexConditionPtr condition;
|
||||
if (index_helper->isVectorSearch())
|
||||
if (index_helper->isVectorSimilarityIndex())
|
||||
{
|
||||
#ifdef ENABLE_ANNOY
|
||||
if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
|
||||
condition = annoy->createIndexCondition(query_info, context);
|
||||
#endif
|
||||
#ifdef ENABLE_USEARCH
|
||||
if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
|
||||
condition = usearch->createIndexCondition(query_info, context);
|
||||
#if USE_USEARCH
|
||||
if (const auto * vector_similarity_index = typeid_cast<const MergeTreeIndexVectorSimilarity *>(index_helper.get()))
|
||||
condition = vector_similarity_index->createIndexCondition(query_info, context);
|
||||
#endif
|
||||
if (const auto * legacy_vector_similarity_index = typeid_cast<const MergeTreeIndexLegacyVectorSimilarity *>(index_helper.get()))
|
||||
condition = legacy_vector_similarity_index->createIndexCondition(query_info, context);
|
||||
if (!condition)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
|
||||
}
|
||||
|
@ -248,6 +248,8 @@ public:
|
||||
|
||||
void attachRequest(HTTPServerRequest * request_) { request = request_; }
|
||||
|
||||
const Poco::Net::HTTPServerSession & getSession() const { return session; }
|
||||
|
||||
private:
|
||||
Poco::Net::HTTPServerSession & session;
|
||||
HTTPServerRequest * request = nullptr;
|
||||
|
@ -30,7 +30,7 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders()
|
||||
if (add_cors_header)
|
||||
response.set("Access-Control-Allow-Origin", "*");
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
|
||||
std::stringstream header; //STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
response.beginWrite(header);
|
||||
@ -119,12 +119,10 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
|
||||
WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
|
||||
HTTPServerResponse & response_,
|
||||
bool is_http_method_head_,
|
||||
UInt64 keep_alive_timeout_,
|
||||
const ProfileEvents::Event & write_event_)
|
||||
: HTTPWriteBuffer(response_.getSocket(), write_event_)
|
||||
, response(response_)
|
||||
, is_http_method_head(is_http_method_head_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,6 @@ public:
|
||||
WriteBufferFromHTTPServerResponse(
|
||||
HTTPServerResponse & response_,
|
||||
bool is_http_method_head_,
|
||||
UInt64 keep_alive_timeout_,
|
||||
const ProfileEvents::Event & write_event_ = ProfileEvents::end());
|
||||
|
||||
~WriteBufferFromHTTPServerResponse() override;
|
||||
@ -91,7 +90,6 @@ private:
|
||||
|
||||
bool is_http_method_head;
|
||||
bool add_cors_header = false;
|
||||
size_t keep_alive_timeout = 0;
|
||||
|
||||
bool initialized = false;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user