mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Merge branch 'master' into hanfei/refine-52451
This commit is contained in:
commit
40992de98d
2
.gitignore
vendored
2
.gitignore
vendored
@ -69,6 +69,7 @@ cmake-build-*
|
||||
*.pyc
|
||||
__pycache__
|
||||
*.pytest_cache
|
||||
.mypy_cache
|
||||
|
||||
test.cpp
|
||||
CPackConfig.cmake
|
||||
@ -167,3 +168,4 @@ tests/integration/**/_gen
|
||||
/rust/**/target
|
||||
# It is autogenerated from *.in
|
||||
/rust/**/.cargo/config.toml
|
||||
/rust/**/vendor
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -258,9 +258,6 @@
|
||||
[submodule "contrib/wyhash"]
|
||||
path = contrib/wyhash
|
||||
url = https://github.com/wangyi-fudan/wyhash
|
||||
[submodule "contrib/hashidsxx"]
|
||||
path = contrib/hashidsxx
|
||||
url = https://github.com/schoentoon/hashidsxx
|
||||
[submodule "contrib/nats-io"]
|
||||
path = contrib/nats-io
|
||||
url = https://github.com/ClickHouse/nats.c
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -164,7 +164,6 @@ add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (hashidsxx-cmake hashidsxx)
|
||||
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_NLP)
|
||||
|
1
contrib/hashidsxx
vendored
1
contrib/hashidsxx
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
|
@ -1,14 +0,0 @@
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/hashids.cpp"
|
||||
)
|
||||
|
||||
set (HDRS
|
||||
"${LIBRARY_DIR}/hashids.h"
|
||||
)
|
||||
|
||||
add_library(_hashidsxx ${SRCS} ${HDRS})
|
||||
target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
|
||||
|
||||
add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
|
@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
|
||||
rustup target add aarch64-apple-darwin && \
|
||||
rustup target add powerpc64le-unknown-linux-gnu
|
||||
|
||||
# Create vendor cache for cargo.
|
||||
#
|
||||
# Note, that the config.toml for the root is used, you will not be able to
|
||||
# install any other crates, except those which had been vendored (since if
|
||||
# there is "replace-with" for some source, then cargo will not look to other
|
||||
# remotes except this).
|
||||
#
|
||||
# Notes for the command itself:
|
||||
# - --chown is required to preserve the rights
|
||||
# - unstable-options for -C
|
||||
# - chmod is required to fix the permissions, since builds are running from a different user
|
||||
# - copy of the Cargo.lock is required for proper dependencies versions
|
||||
# - cargo vendor --sync is requried to overcome [1] bug.
|
||||
#
|
||||
# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23
|
||||
COPY --chown=root:root /rust /rust/packages
|
||||
RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \
|
||||
cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \
|
||||
cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \
|
||||
rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \
|
||||
sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \
|
||||
cat $CARGO_HOME/config.toml && \
|
||||
mv /rust/packages/vendor /rust/vendor && \
|
||||
chmod -R o=r+X /rust/vendor && \
|
||||
ls -R -l /rust/packages && \
|
||||
rm -r /rust/packages
|
||||
|
||||
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
|
||||
# A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
|
||||
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
|
||||
|
1
docker/packager/binary/rust
Symbolic link
1
docker/packager/binary/rust
Symbolic link
@ -0,0 +1 @@
|
||||
../../../rust
|
@ -141,7 +141,6 @@ function clone_submodules
|
||||
contrib/jemalloc
|
||||
contrib/replxx
|
||||
contrib/wyhash
|
||||
contrib/hashidsxx
|
||||
contrib/c-ares
|
||||
contrib/morton-nd
|
||||
contrib/xxHash
|
||||
|
@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables.
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` (String) — Setting name.
|
||||
- `value` (String) — Setting value.
|
||||
- `description` (String) — Setting description.
|
||||
- `type` (String) — Setting type (implementation specific string value).
|
||||
- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
|
||||
- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
|
||||
- `0` — Current user can change the setting.
|
||||
- `1` — Current user can’t change the setting.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
|
||||
- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
|
||||
|
||||
**Example**
|
||||
```sql
|
||||
@ -21,35 +27,51 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
name: min_compress_block_size
|
||||
value: 0
|
||||
changed: 0
|
||||
description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.
|
||||
min: ____
|
||||
max: ____
|
||||
readonly: 0
|
||||
type: UInt64
|
||||
is_obsolete: 0
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
name: max_compress_block_size
|
||||
value: 0
|
||||
changed: 0
|
||||
description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.
|
||||
min: ____
|
||||
max: ____
|
||||
readonly: 0
|
||||
type: UInt64
|
||||
is_obsolete: 0
|
||||
|
||||
Row 3:
|
||||
──────
|
||||
name: index_granularity
|
||||
value: 8192
|
||||
changed: 0
|
||||
description: How many rows correspond to one primary key value.
|
||||
type: SettingUInt64
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
name: min_bytes_for_wide_part
|
||||
value: 0
|
||||
changed: 0
|
||||
description: Minimal uncompressed size in bytes to create part in wide format instead of compact
|
||||
type: SettingUInt64
|
||||
|
||||
Row 3:
|
||||
──────
|
||||
name: min_rows_for_wide_part
|
||||
value: 0
|
||||
changed: 0
|
||||
description: Minimal number of rows to create part in wide format instead of compact
|
||||
type: SettingUInt64
|
||||
min: ____
|
||||
max: ____
|
||||
readonly: 0
|
||||
type: UInt64
|
||||
is_obsolete: 0
|
||||
|
||||
Row 4:
|
||||
──────
|
||||
name: merge_max_block_size
|
||||
value: 8192
|
||||
name: max_digestion_size_per_segment
|
||||
value: 268435456
|
||||
changed: 0
|
||||
description: How many rows in blocks should be formed for merge operations.
|
||||
type: SettingUInt64
|
||||
description: Max number of bytes to digest per segment to build GIN index.
|
||||
min: ____
|
||||
max: ____
|
||||
readonly: 0
|
||||
type: UInt64
|
||||
is_obsolete: 0
|
||||
|
||||
4 rows in set. Elapsed: 0.001 sec.
|
||||
4 rows in set. Elapsed: 0.009 sec.
|
||||
```
|
||||
|
@ -14,6 +14,7 @@ Columns:
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
|
||||
- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
|
||||
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
|
||||
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
|
||||
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
|
||||
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
|
||||
┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
───────────────────────────────────_─type───_─is_obsolete─┐
|
||||
│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │
|
||||
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │
|
||||
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │
|
||||
│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │
|
||||
│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │
|
||||
│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │
|
||||
│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │
|
||||
│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │
|
||||
│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │
|
||||
└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
───────────────────────────────────┴────────┴─────────────┘
|
||||
```
|
||||
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check
|
||||
|
@ -17,6 +17,7 @@ Columns:
|
||||
- `0` — Current user can change the setting.
|
||||
- `1` — Current user can’t change the setting.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
|
||||
- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐
|
||||
│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
|
||||
│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
|
||||
│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
|
||||
└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
|
||||
┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐
|
||||
│ min_insert_block_size_rows │ 1048449 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 1048449 │ │ 0 │
|
||||
│ min_insert_block_size_bytes │ 268402944 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 268402944 │ │ 0 │
|
||||
│ min_insert_block_size_rows_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
|
||||
│ min_insert_block_size_bytes_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
|
||||
│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │ 0 │ Milliseconds │ 1000 │ │ 0 │
|
||||
└────────────────────────────────────────────────────┴───────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘
|
||||
```
|
||||
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check:
|
||||
|
@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
@ -631,3 +631,53 @@ Result:
|
||||
│ 100 │ 200 │ 100-200 │ 100 │
|
||||
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequence
|
||||
|
||||
Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
|
||||
A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hasSubsequence(haystack, needle)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1, if needle is a subsequence of haystack.
|
||||
- 0, otherwise.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT hasSubsequence('garbage', 'arg') ;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequence('garbage', 'arg')─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequenceCaseInsensitive
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
|
@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
|
||||
│ 3 │
|
||||
└────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequence(haystack, needle) {#hasSubsequence}
|
||||
|
||||
Возвращает 1 если needle является подпоследовательностью haystack, иначе 0.
|
||||
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
hasSubsequence(haystack, needle)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
|
||||
- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- 1, если
|
||||
- 0, если подстрока не найдена.
|
||||
|
||||
Тип: `UInt8`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT hasSubsequence('garbage', 'arg') ;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequence('garbage', 'arg')─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## hasSubsequenceCaseInsensitive
|
||||
|
||||
Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра.
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8.
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра.
|
||||
|
4
rust/.dockerignore
Normal file
4
rust/.dockerignore
Normal file
@ -0,0 +1,4 @@
|
||||
# Just in case ignore any cargo stuff (and just in case someone will run this
|
||||
# docker build locally with build context using folder root):
|
||||
target
|
||||
vendor
|
4
rust/.gitignore
vendored
Normal file
4
rust/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# This is for tar --exclude-vcs-ignores (and just in case someone will run
|
||||
# docker build locally with build context created via tar):
|
||||
target
|
||||
vendor
|
92
rust/BLAKE3/Cargo.lock
generated
92
rust/BLAKE3/Cargo.lock
generated
@ -1,92 +0,0 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_blake3"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"blake3",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.132"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
@ -55,6 +55,8 @@ function(clickhouse_import_crate)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Note, here --offline is not used, since on CI vendor archive is used, and
|
||||
# passing --offline here will be inconvenient for local development.
|
||||
corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
|
||||
endfunction()
|
||||
|
||||
|
519
rust/skim/Cargo.lock → rust/Cargo.lock
generated
519
rust/skim/Cargo.lock → rust/Cargo.lock
generated
@ -2,6 +2,22 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_blake3"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"blake3",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"prql-compiler",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_skim_rust"
|
||||
version = "0.1.0"
|
||||
@ -12,6 +28,32 @@ dependencies = [
|
||||
"term",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
@ -36,6 +78,31 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.72"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ariadne"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.4"
|
||||
@ -48,6 +115,21 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
@ -60,6 +142,29 @@ version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.13.0"
|
||||
@ -93,6 +198,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
|
||||
dependencies = [
|
||||
"hashbrown 0.12.3",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codespan-reporting"
|
||||
version = "0.11.1"
|
||||
@ -103,6 +218,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.4"
|
||||
@ -177,10 +298,41 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.101"
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cxxbridge-flags",
|
||||
@ -190,9 +342,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx-build"
|
||||
version = "1.0.101"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
|
||||
checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"codespan-reporting",
|
||||
@ -200,24 +352,24 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"scratch",
|
||||
"syn 2.0.26",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-flags"
|
||||
version = "1.0.101"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
|
||||
checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0"
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-macro"
|
||||
version = "1.0.101"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
|
||||
checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.26",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -296,6 +448,17 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-next"
|
||||
version = "2.0.0"
|
||||
@ -319,9 +482,27 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
@ -338,6 +519,16 @@ dependencies = [
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.10"
|
||||
@ -349,6 +540,33 @@ dependencies = [
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.2"
|
||||
@ -384,6 +602,31 @@ version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
@ -444,6 +687,21 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.24.3"
|
||||
@ -470,10 +728,20 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
@ -488,6 +756,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.31.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
@ -509,6 +786,41 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ariadne",
|
||||
"chumsky",
|
||||
"csv",
|
||||
"enum-as-inner",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sqlformat",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.31"
|
||||
@ -589,12 +901,24 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
@ -608,10 +932,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.171"
|
||||
name = "semver"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
|
||||
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.174"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.174"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
@ -638,12 +1009,74 @@ dependencies = [
|
||||
"vte",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlformat"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"nom",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
@ -657,9 +1090,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.26"
|
||||
version = "2.0.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
|
||||
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -688,22 +1121,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.43"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
|
||||
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.43"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
|
||||
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.26",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -766,6 +1199,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.11"
|
||||
@ -778,12 +1217,30 @@ version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
|
||||
[[package]]
|
||||
name = "unicode_categories"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe-libyaml"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "vte"
|
||||
version = "0.11.1"
|
||||
@ -838,7 +1295,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.26",
|
||||
"syn 2.0.27",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@ -860,7 +1317,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.26",
|
||||
"syn 2.0.27",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
@ -967,3 +1424,9 @@ name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
12
rust/Cargo.toml
Normal file
12
rust/Cargo.toml
Normal file
@ -0,0 +1,12 @@
|
||||
# workspace is required to vendor crates for all packages.
|
||||
[workspace]
|
||||
members = [
|
||||
"BLAKE3",
|
||||
"skim",
|
||||
"prql",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
# FIXME: even though the profiles should be defined in the main cargo config we
|
||||
# cannot do this yet, since we compile each package separatelly, so you should
|
||||
# ignore warning from cargo about this.
|
569
rust/prql/Cargo.lock
generated
569
rust/prql/Cargo.lock
generated
@ -1,569 +0,0 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"prql-compiler",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ariadne"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
|
||||
dependencies = [
|
||||
"hashbrown 0.12.3",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.31.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ariadne",
|
||||
"chumsky",
|
||||
"csv",
|
||||
"enum-as-inner",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sqlformat",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlformat"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"nom",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
|
||||
[[package]]
|
||||
name = "unicode_categories"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe-libyaml"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
@ -187,6 +187,7 @@
|
||||
M(CacheFileSegments, "Number of existing cache file segments") \
|
||||
M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \
|
||||
M(FilesystemCacheSize, "Filesystem cache size in bytes") \
|
||||
M(FilesystemCacheSizeLimit, "Filesystem cache size limit in bytes") \
|
||||
M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \
|
||||
M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \
|
||||
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include "CurrentThread.h"
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Common/TaskStatsInfoGetter.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <base/getThreadId.h>
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "TaskStatsInfoGetter.h"
|
||||
#include "NetlinkMetricsProvider.h"
|
||||
#include <Common/Exception.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
@ -200,7 +200,7 @@ bool checkPermissionsImpl()
|
||||
if (!res)
|
||||
return false;
|
||||
|
||||
/// Check that we can successfully initialize TaskStatsInfoGetter.
|
||||
/// Check that we can successfully initialize NetlinkMetricsProvider.
|
||||
/// It will ask about family id through Netlink.
|
||||
/// On some LXC containers we have capability but we still cannot use Netlink.
|
||||
/// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result.
|
||||
@ -208,7 +208,7 @@ bool checkPermissionsImpl()
|
||||
try
|
||||
{
|
||||
::taskstats stats{};
|
||||
TaskStatsInfoGetter().getStat(stats, static_cast<pid_t>(getThreadId()));
|
||||
NetlinkMetricsProvider().getStat(stats, static_cast<pid_t>(getThreadId()));
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
@ -244,14 +244,14 @@ UInt16 getFamilyId(int fd)
|
||||
}
|
||||
|
||||
|
||||
bool TaskStatsInfoGetter::checkPermissions()
|
||||
bool NetlinkMetricsProvider::checkPermissions()
|
||||
{
|
||||
static bool res = checkPermissionsImpl();
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
TaskStatsInfoGetter::TaskStatsInfoGetter()
|
||||
NetlinkMetricsProvider::NetlinkMetricsProvider()
|
||||
{
|
||||
netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
|
||||
if (netlink_socket_fd < 0)
|
||||
@ -293,7 +293,7 @@ TaskStatsInfoGetter::TaskStatsInfoGetter()
|
||||
}
|
||||
|
||||
|
||||
void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
|
||||
void NetlinkMetricsProvider::getStat(::taskstats & out_stats, pid_t tid) const
|
||||
{
|
||||
NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid));
|
||||
|
||||
@ -318,7 +318,7 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
|
||||
}
|
||||
|
||||
|
||||
TaskStatsInfoGetter::~TaskStatsInfoGetter()
|
||||
NetlinkMetricsProvider::~NetlinkMetricsProvider()
|
||||
{
|
||||
if (netlink_socket_fd >= 0)
|
||||
{
|
||||
@ -335,15 +335,15 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter()
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool TaskStatsInfoGetter::checkPermissions()
|
||||
bool NetlinkMetricsProvider::checkPermissions()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
TaskStatsInfoGetter::TaskStatsInfoGetter() = default;
|
||||
TaskStatsInfoGetter::~TaskStatsInfoGetter() = default;
|
||||
NetlinkMetricsProvider::NetlinkMetricsProvider() = default;
|
||||
NetlinkMetricsProvider::~NetlinkMetricsProvider() = default;
|
||||
|
||||
void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const
|
||||
void NetlinkMetricsProvider::getStat(::taskstats &, pid_t) const
|
||||
{
|
||||
}
|
||||
|
@ -15,11 +15,11 @@ namespace DB
|
||||
///
|
||||
/// [1]: https://elixir.bootlin.com/linux/v5.18-rc4/source/kernel/tsacct.c#L101
|
||||
///
|
||||
class TaskStatsInfoGetter : private boost::noncopyable
|
||||
class NetlinkMetricsProvider : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
TaskStatsInfoGetter();
|
||||
~TaskStatsInfoGetter();
|
||||
NetlinkMetricsProvider();
|
||||
~NetlinkMetricsProvider();
|
||||
|
||||
void getStat(::taskstats & out_stats, pid_t tid) const;
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include "TaskStatsInfoGetter.h"
|
||||
#include "NetlinkMetricsProvider.h"
|
||||
#include "ProcfsMetricsProvider.h"
|
||||
#include "hasLinuxCapability.h"
|
||||
|
||||
@ -99,7 +99,7 @@ TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvide
|
||||
static std::optional<MetricsProvider> provider =
|
||||
[]() -> MetricsProvider
|
||||
{
|
||||
if (TaskStatsInfoGetter::checkPermissions())
|
||||
if (NetlinkMetricsProvider::checkPermissions())
|
||||
{
|
||||
return MetricsProvider::Netlink;
|
||||
}
|
||||
@ -119,7 +119,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p
|
||||
switch (provider)
|
||||
{
|
||||
case MetricsProvider::Netlink:
|
||||
stats_getter = [metrics_provider = std::make_shared<TaskStatsInfoGetter>(), tid]()
|
||||
stats_getter = [metrics_provider = std::make_shared<NetlinkMetricsProvider>(), tid]()
|
||||
{
|
||||
::taskstats result{};
|
||||
metrics_provider->getStat(result, static_cast<pid_t>(tid));
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <base/sort.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
#include "Common/ZooKeeper/IKeeper.h"
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
@ -82,6 +83,9 @@ void ZooKeeper::init(ZooKeeperArgs args_)
|
||||
if (secure)
|
||||
host_string.erase(0, strlen("secure://"));
|
||||
|
||||
/// We want to resolve all hosts without DNS cache for keeper connection.
|
||||
Coordination::DNSResolver::instance().removeHostFromCache(host_string);
|
||||
|
||||
const Poco::Net::SocketAddress host_socket_addr{host_string};
|
||||
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString());
|
||||
nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, secure});
|
||||
|
@ -43,11 +43,12 @@ void LimitedReadPacket::readPayloadWithUnpacked(ReadBuffer & in)
|
||||
IMySQLReadPacket::readPayloadWithUnpacked(limited);
|
||||
}
|
||||
|
||||
uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
|
||||
uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read)
|
||||
{
|
||||
char c{};
|
||||
uint64_t buf = 0;
|
||||
buffer.readStrict(c);
|
||||
bytes_read = 1;
|
||||
auto cc = static_cast<uint8_t>(c);
|
||||
switch (cc)
|
||||
{
|
||||
@ -56,12 +57,15 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
|
||||
break;
|
||||
case 0xfc:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
|
||||
bytes_read += 2;
|
||||
break;
|
||||
case 0xfd:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
|
||||
bytes_read += 3;
|
||||
break;
|
||||
case 0xfe:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
|
||||
bytes_read += 8;
|
||||
break;
|
||||
default:
|
||||
return cc;
|
||||
@ -69,6 +73,12 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
|
||||
return buf;
|
||||
}
|
||||
|
||||
uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
|
||||
{
|
||||
UInt16 bytes_read = 0;
|
||||
return readLengthEncodedNumber(buffer, bytes_read);
|
||||
}
|
||||
|
||||
void readLengthEncodedString(String & s, ReadBuffer & buffer)
|
||||
{
|
||||
uint64_t len = readLengthEncodedNumber(buffer);
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
};
|
||||
|
||||
uint64_t readLengthEncodedNumber(ReadBuffer & buffer);
|
||||
uint64_t readLengthEncodedNumber(ReadBuffer & buffer, UInt16 & bytes_read);
|
||||
void readLengthEncodedString(String & s, ReadBuffer & buffer);
|
||||
|
||||
}
|
||||
|
301
src/Core/MySQL/MySQLCharset.cpp
Normal file
301
src/Core/MySQL/MySQLCharset.cpp
Normal file
@ -0,0 +1,301 @@
|
||||
#include "MySQLCharset.h"
|
||||
#include "config.h"
|
||||
#include <iostream>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#if USE_ICU
|
||||
#include <unicode/ucnv.h>
|
||||
#define CHUNK_SIZE 1024
|
||||
static const char * TARGET_CHARSET = "utf8";
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_EXCEPTION;
|
||||
}
|
||||
|
||||
const std::unordered_map<Int32, String> MySQLCharset::charsets
|
||||
= {
|
||||
{1, "big5"},
|
||||
{2, "latin2"},
|
||||
{3, "dec8"},
|
||||
{4, "cp850"},
|
||||
{5, "latin1"},
|
||||
{6, "hp8"},
|
||||
{7, "koi8r"},
|
||||
{8, "latin1"},
|
||||
{9, "latin2"},
|
||||
{10, "swe7"},
|
||||
{11, "ascii"},
|
||||
{12, "ujis"},
|
||||
{13, "sjis"},
|
||||
{14, "cp1251"},
|
||||
{15, "latin1"},
|
||||
{16, "hebrew"},
|
||||
{18, "tis620"},
|
||||
{19, "euckr"},
|
||||
{20, "latin7"},
|
||||
{21, "latin2"},
|
||||
{22, "koi8u"},
|
||||
{23, "cp1251"},
|
||||
{24, "gb2312"},
|
||||
{25, "greek"},
|
||||
{26, "cp1250"},
|
||||
{27, "latin2"},
|
||||
{28, "gbk"},
|
||||
{29, "cp1257"},
|
||||
{30, "latin5"},
|
||||
{31, "latin1"},
|
||||
{32, "armscii8"},
|
||||
{34, "cp1250"},
|
||||
{35, "ucs2"},
|
||||
{36, "cp866"},
|
||||
{37, "keybcs2"},
|
||||
{38, "macce"},
|
||||
{39, "macroman"},
|
||||
{40, "cp852"},
|
||||
{41, "latin7"},
|
||||
{42, "latin7"},
|
||||
{43, "macce"},
|
||||
{44, "cp1250"},
|
||||
{47, "latin1"},
|
||||
{48, "latin1"},
|
||||
{49, "latin1"},
|
||||
{50, "cp1251"},
|
||||
{51, "cp1251"},
|
||||
{52, "cp1251"},
|
||||
{53, "macroman"},
|
||||
{54, "utf16"},
|
||||
{55, "utf16"},
|
||||
{56, "utf16le"},
|
||||
{57, "cp1256"},
|
||||
{58, "cp1257"},
|
||||
{59, "cp1257"},
|
||||
{60, "utf32"},
|
||||
{61, "utf32"},
|
||||
{62, "utf16le"},
|
||||
{64, "armscii8"},
|
||||
{65, "ascii"},
|
||||
{66, "cp1250"},
|
||||
{67, "cp1256"},
|
||||
{68, "cp866"},
|
||||
{69, "dec8"},
|
||||
{70, "greek"},
|
||||
{71, "hebrew"},
|
||||
{72, "hp8"},
|
||||
{73, "keybcs2"},
|
||||
{74, "koi8r"},
|
||||
{75, "koi8u"},
|
||||
{77, "latin2"},
|
||||
{78, "latin5"},
|
||||
{79, "latin7"},
|
||||
{80, "cp850"},
|
||||
{81, "cp852"},
|
||||
{82, "swe7"},
|
||||
{84, "big5"},
|
||||
{85, "euckr"},
|
||||
{86, "gb2312"},
|
||||
{87, "gbk"},
|
||||
{88, "sjis"},
|
||||
{89, "tis620"},
|
||||
{90, "ucs2"},
|
||||
{91, "ujis"},
|
||||
{92, "geostd8"},
|
||||
{93, "geostd8"},
|
||||
{94, "latin1"},
|
||||
{95, "cp932"},
|
||||
{96, "cp932"},
|
||||
{97, "eucjpms"},
|
||||
{98, "eucjpms"},
|
||||
{99, "cp1250"},
|
||||
{101, "utf16"},
|
||||
{102, "utf16"},
|
||||
{103, "utf16"},
|
||||
{104, "utf16"},
|
||||
{105, "utf16"},
|
||||
{106, "utf16"},
|
||||
{107, "utf16"},
|
||||
{108, "utf16"},
|
||||
{109, "utf16"},
|
||||
{110, "utf16"},
|
||||
{111, "utf16"},
|
||||
{112, "utf16"},
|
||||
{113, "utf16"},
|
||||
{114, "utf16"},
|
||||
{115, "utf16"},
|
||||
{116, "utf16"},
|
||||
{117, "utf16"},
|
||||
{118, "utf16"},
|
||||
{119, "utf16"},
|
||||
{120, "utf16"},
|
||||
{121, "utf16"},
|
||||
{122, "utf16"},
|
||||
{123, "utf16"},
|
||||
{124, "utf16"},
|
||||
{128, "ucs2"},
|
||||
{129, "ucs2"},
|
||||
{130, "ucs2"},
|
||||
{131, "ucs2"},
|
||||
{132, "ucs2"},
|
||||
{133, "ucs2"},
|
||||
{134, "ucs2"},
|
||||
{135, "ucs2"},
|
||||
{136, "ucs2"},
|
||||
{137, "ucs2"},
|
||||
{138, "ucs2"},
|
||||
{139, "ucs2"},
|
||||
{140, "ucs2"},
|
||||
{141, "ucs2"},
|
||||
{142, "ucs2"},
|
||||
{143, "ucs2"},
|
||||
{144, "ucs2"},
|
||||
{145, "ucs2"},
|
||||
{146, "ucs2"},
|
||||
{147, "ucs2"},
|
||||
{148, "ucs2"},
|
||||
{149, "ucs2"},
|
||||
{150, "ucs2"},
|
||||
{151, "ucs2"},
|
||||
{159, "ucs2"},
|
||||
{160, "utf32"},
|
||||
{161, "utf32"},
|
||||
{162, "utf32"},
|
||||
{163, "utf32"},
|
||||
{164, "utf32"},
|
||||
{165, "utf32"},
|
||||
{166, "utf32"},
|
||||
{167, "utf32"},
|
||||
{168, "utf32"},
|
||||
{169, "utf32"},
|
||||
{170, "utf32"},
|
||||
{171, "utf32"},
|
||||
{172, "utf32"},
|
||||
{173, "utf32"},
|
||||
{174, "utf32"},
|
||||
{175, "utf32"},
|
||||
{176, "utf32"},
|
||||
{177, "utf32"},
|
||||
{178, "utf32"},
|
||||
{179, "utf32"},
|
||||
{180, "utf32"},
|
||||
{181, "utf32"},
|
||||
{182, "utf32"},
|
||||
{183, "utf32"},
|
||||
{248, "gb18030"},
|
||||
{249, "gb18030"},
|
||||
{250, "gb18030"}
|
||||
};
|
||||
|
||||
MySQLCharset::~MySQLCharset()
|
||||
{
|
||||
#if USE_ICU
|
||||
std::lock_guard lock(mutex);
|
||||
for (auto & conv : conv_cache)
|
||||
{
|
||||
ucnv_close(conv.second);
|
||||
}
|
||||
conv_cache.clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool MySQLCharset::needConvert(UInt32 id)
|
||||
{
|
||||
return charsets.contains(id);
|
||||
}
|
||||
|
||||
String MySQLCharset::getCharsetFromId(UInt32 id)
|
||||
{
|
||||
return charsets.at(id);
|
||||
}
|
||||
|
||||
UConverter * MySQLCharset::getCachedConverter(const String & charset [[maybe_unused]])
|
||||
{
|
||||
UConverter * conv = nullptr;
|
||||
#if USE_ICU
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
/// Get conv from cache
|
||||
auto result = conv_cache.find(charset);
|
||||
if (result != conv_cache.end())
|
||||
{
|
||||
conv = result->second;
|
||||
//reset to init state
|
||||
ucnv_reset(conv);
|
||||
}
|
||||
else
|
||||
{
|
||||
conv = ucnv_open(charset.c_str(), &error);
|
||||
if (error != U_ZERO_ERROR)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_EXCEPTION, "MySQLCharset::getCachedConveter: ucnv_open failed, error={}", std::to_string(error));
|
||||
}
|
||||
conv_cache[charset.c_str()] = conv;
|
||||
}
|
||||
#endif
|
||||
return conv;
|
||||
}
|
||||
|
||||
Int32 MySQLCharset::convertFromId(UInt32 id [[maybe_unused]], String & to, const String & from)
|
||||
{
|
||||
#if USE_ICU
|
||||
std::lock_guard lock(mutex);
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
String source_charset = getCharsetFromId(id);
|
||||
to.clear();
|
||||
if (source_charset.empty())
|
||||
{
|
||||
return U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
UChar pivot_buf[CHUNK_SIZE]; // stream mode must use this buf
|
||||
char target_buf[CHUNK_SIZE];
|
||||
UChar * pivot;
|
||||
UChar * pivot2;
|
||||
UConverter * in_conv;
|
||||
UConverter * out_conv;
|
||||
char * cur_target;
|
||||
const char * source_end;
|
||||
const char * target_end;
|
||||
|
||||
size_t source_len = from.size();
|
||||
const char * source = from.data();
|
||||
source_end = source + source_len;
|
||||
|
||||
out_conv = getCachedConverter(TARGET_CHARSET);
|
||||
in_conv = getCachedConverter(source_charset);
|
||||
pivot = pivot_buf;
|
||||
pivot2 = pivot_buf;
|
||||
|
||||
target_end = target_buf + CHUNK_SIZE;
|
||||
do
|
||||
{
|
||||
error = U_ZERO_ERROR;
|
||||
cur_target = target_buf;
|
||||
ucnv_convertEx(
|
||||
out_conv,
|
||||
in_conv,
|
||||
&cur_target,
|
||||
target_end,
|
||||
&source,
|
||||
source_end,
|
||||
pivot_buf,
|
||||
&pivot,
|
||||
&pivot2,
|
||||
pivot_buf + CHUNK_SIZE,
|
||||
false,
|
||||
true,
|
||||
&error);
|
||||
to.append(target_buf, cur_target - target_buf);
|
||||
} while (error == U_BUFFER_OVERFLOW_ERROR);
|
||||
|
||||
return error;
|
||||
#else
|
||||
to = from;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
26
src/Core/MySQL/MySQLCharset.h
Normal file
26
src/Core/MySQL/MySQLCharset.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
#include <unordered_map>
|
||||
#include <base/types.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <mutex>
|
||||
|
||||
struct UConverter;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class MySQLCharset final : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
~MySQLCharset();
|
||||
String getCharsetFromId(UInt32 id);
|
||||
Int32 convertFromId(UInt32 id, String & to, const String & from);
|
||||
bool needConvert(UInt32 id);
|
||||
private:
|
||||
std::mutex mutex;
|
||||
std::unordered_map<String, UConverter *> conv_cache;
|
||||
UConverter * getCachedConverter(const String & charset);
|
||||
static const std::unordered_map<Int32, String> charsets;
|
||||
};
|
||||
|
||||
using MySQLCharsetPtr = std::shared_ptr<MySQLCharset>;
|
||||
}
|
@ -187,9 +187,9 @@ namespace MySQLReplication
|
||||
size_t null_bitmap_size = (column_count + 7) / 8;
|
||||
readBitmap(payload, null_bitmap, null_bitmap_size);
|
||||
|
||||
/// Ignore MySQL 8.0 optional metadata fields.
|
||||
/// Parse MySQL 8.0 optional metadata fields.
|
||||
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
|
||||
payload.ignoreAll();
|
||||
parseOptionalMetaField(payload);
|
||||
}
|
||||
|
||||
/// Types that do not used in the binlog event:
|
||||
@ -263,6 +263,118 @@ namespace MySQLReplication
|
||||
}
|
||||
}
|
||||
|
||||
void TableMapEvent::parseOptionalMetaField(ReadBuffer & payload)
|
||||
{
|
||||
char type = 0;
|
||||
while (payload.read(type))
|
||||
{
|
||||
UInt64 len = readLengthEncodedNumber(payload);
|
||||
if (len == 0)
|
||||
{
|
||||
payload.ignoreAll();
|
||||
return;
|
||||
}
|
||||
switch (type)
|
||||
{
|
||||
/// It may be useful, parse later
|
||||
case SIGNEDNESS:
|
||||
payload.ignore(len);
|
||||
break;
|
||||
case DEFAULT_CHARSET:
|
||||
{
|
||||
UInt32 total_read = 0;
|
||||
UInt16 once_read = 0;
|
||||
default_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
|
||||
total_read += once_read;
|
||||
while (total_read < len)
|
||||
{
|
||||
UInt32 col_index = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
|
||||
total_read += once_read;
|
||||
UInt32 col_charset = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
|
||||
total_read += once_read;
|
||||
default_charset_pairs.emplace(col_index, col_charset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case COLUMN_CHARSET:
|
||||
{
|
||||
UInt32 total_read = 0;
|
||||
UInt16 once_read = 0;
|
||||
while (total_read < len)
|
||||
{
|
||||
UInt32 collation_id = static_cast<UInt32>(readLengthEncodedNumber(payload, once_read));
|
||||
column_charset.emplace_back(collation_id);
|
||||
total_read += once_read;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case COLUMN_NAME:
|
||||
payload.ignore(len);
|
||||
break;
|
||||
case SET_STR_VALUE:
|
||||
case GEOMETRY_TYPE:
|
||||
case SIMPLE_PRIMARY_KEY:
|
||||
case PRIMARY_KEY_WITH_PREFIX:
|
||||
case ENUM_AND_SET_DEFAULT_CHARSET:
|
||||
case COLUMN_VISIBILITY:
|
||||
default:
|
||||
payload.ignore(len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UInt32 TableMapEvent::getColumnCharsetId(UInt32 column_index)
|
||||
{
|
||||
if (!column_charset.empty())
|
||||
{
|
||||
UInt32 str_index = 0xFFFFFFFF;
|
||||
/// Calc the index in the column_charset
|
||||
for (UInt32 i = 0; i <= column_index; ++i)
|
||||
{
|
||||
switch (column_type[i])
|
||||
{
|
||||
case MYSQL_TYPE_STRING:
|
||||
case MYSQL_TYPE_VAR_STRING:
|
||||
case MYSQL_TYPE_VARCHAR:
|
||||
case MYSQL_TYPE_BLOB:
|
||||
++str_index;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (str_index != 0xFFFFFFFF && str_index < column_charset.size())
|
||||
{
|
||||
return column_charset[str_index];
|
||||
}
|
||||
}
|
||||
else if (!default_charset_pairs.empty())
|
||||
{
|
||||
UInt32 str_index = 0xFFFFFFFF;
|
||||
for (UInt32 i = 0; i <= column_index; ++i)
|
||||
{
|
||||
switch (column_type[i])
|
||||
{
|
||||
case MYSQL_TYPE_STRING:
|
||||
case MYSQL_TYPE_VAR_STRING:
|
||||
case MYSQL_TYPE_VARCHAR:
|
||||
case MYSQL_TYPE_BLOB:
|
||||
++str_index;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (default_charset_pairs.contains(str_index))
|
||||
{
|
||||
return default_charset_pairs[str_index];
|
||||
}
|
||||
}
|
||||
return default_charset;
|
||||
}
|
||||
|
||||
void TableMapEvent::dump(WriteBuffer & out) const
|
||||
{
|
||||
header.dump(out);
|
||||
@ -319,6 +431,22 @@ namespace MySQLReplication
|
||||
}
|
||||
}
|
||||
|
||||
static inline String convertCharsetIfNeeded(
|
||||
const std::shared_ptr<TableMapEvent> & table_map,
|
||||
UInt32 i,
|
||||
const String & val)
|
||||
{
|
||||
const auto collation_id = table_map->getColumnCharsetId(i);
|
||||
if (table_map->charset_ptr->needConvert(collation_id))
|
||||
{
|
||||
String target;
|
||||
auto err = table_map->charset_ptr->convertFromId(collation_id, target, val);
|
||||
if (err == 0)
|
||||
return target;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
/// Types that do not used in the binlog event:
|
||||
/// MYSQL_TYPE_SET
|
||||
/// MYSQL_TYPE_TINY_BLOB
|
||||
@ -727,7 +855,7 @@ namespace MySQLReplication
|
||||
String val;
|
||||
val.resize(size);
|
||||
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
|
||||
row.push_back(Field{String{val}});
|
||||
row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_STRING:
|
||||
@ -745,7 +873,7 @@ namespace MySQLReplication
|
||||
String val;
|
||||
val.resize(size);
|
||||
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
|
||||
row.push_back(Field{String{val}});
|
||||
row.emplace_back(Field{convertCharsetIfNeeded(table_map, i, val)});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_GEOMETRY:
|
||||
@ -777,7 +905,10 @@ namespace MySQLReplication
|
||||
String val;
|
||||
val.resize(size);
|
||||
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
|
||||
row.push_back(Field{String{val}});
|
||||
row.emplace_back(Field{
|
||||
field_type == MYSQL_TYPE_BLOB
|
||||
? convertCharsetIfNeeded(table_map, i, val)
|
||||
: val});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -977,7 +1108,7 @@ namespace MySQLReplication
|
||||
map_event_header.parse(event_payload);
|
||||
if (doReplicate(map_event_header.schema, map_event_header.table))
|
||||
{
|
||||
event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header);
|
||||
event = std::make_shared<TableMapEvent>(std::move(event_header), map_event_header, flavor_charset);
|
||||
event->parseEvent(event_payload);
|
||||
auto table_map = std::static_pointer_cast<TableMapEvent>(event);
|
||||
table_maps[table_map->table_id] = table_map;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Core/Field.h>
|
||||
#include <Core/MySQL/PacketsReplication.h>
|
||||
#include <Core/MySQL/MySQLGtid.h>
|
||||
#include <Core/MySQL/MySQLCharset.h>
|
||||
#include <base/types.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
@ -324,9 +325,24 @@ namespace MySQLReplication
|
||||
UInt32 column_count;
|
||||
std::vector<UInt8> column_type;
|
||||
std::vector<UInt16> column_meta;
|
||||
/// Character set of string columns
|
||||
std::vector<UInt32> column_charset;
|
||||
/// Character set of string columns,
|
||||
/// optimized to minimize space when many
|
||||
/// columns have the same charset
|
||||
UInt32 default_charset = 255; /// utf8mb4_0900_ai_ci
|
||||
std::unordered_map<UInt32, UInt32> default_charset_pairs;
|
||||
/// Points to flavor_charset object
|
||||
MySQLCharsetPtr charset_ptr;
|
||||
Bitmap null_bitmap;
|
||||
|
||||
TableMapEvent(EventHeader && header_, const TableMapEventHeader & map_event_header) : EventBase(std::move(header_)), column_count(0)
|
||||
TableMapEvent(
|
||||
EventHeader && header_,
|
||||
const TableMapEventHeader & map_event_header,
|
||||
const MySQLCharsetPtr & charset_ptr_)
|
||||
: EventBase(std::move(header_))
|
||||
, column_count(0)
|
||||
, charset_ptr(charset_ptr_)
|
||||
{
|
||||
table_id = map_event_header.table_id;
|
||||
flags = map_event_header.flags;
|
||||
@ -336,10 +352,52 @@ namespace MySQLReplication
|
||||
table = map_event_header.table;
|
||||
}
|
||||
void dump(WriteBuffer & out) const override;
|
||||
UInt32 getColumnCharsetId(UInt32 column_index);
|
||||
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
|
||||
/// https://github.com/mysql/mysql-server/blob/8.0/libbinlogevents/include/rows_event.h#L50
|
||||
/// DEFAULT_CHARSET and COLUMN_CHARSET don't appear together, and
|
||||
/// ENUM_AND_SET_DEFAULT_CHARSET and ENUM_AND_SET_COLUMN_CHARSET don't appear together.
|
||||
enum OptionalMetaType : char
|
||||
{
|
||||
/// UNSIGNED flag of numeric columns
|
||||
SIGNEDNESS = 1,
|
||||
/// Character set of string columns, optimized to
|
||||
/// minimize space when many columns have the
|
||||
/// same charset
|
||||
DEFAULT_CHARSET,
|
||||
/// Character set of string columns, optimized to
|
||||
/// minimize space when columns have many
|
||||
/// different charsets
|
||||
COLUMN_CHARSET,
|
||||
COLUMN_NAME,
|
||||
/// String value of SET columns
|
||||
SET_STR_VALUE,
|
||||
/// String value of ENUM columns
|
||||
ENUM_STR_VALUE,
|
||||
/// Real type of geometry columns
|
||||
GEOMETRY_TYPE,
|
||||
/// Primary key without prefix
|
||||
SIMPLE_PRIMARY_KEY,
|
||||
/// Primary key with prefix
|
||||
PRIMARY_KEY_WITH_PREFIX,
|
||||
/// Character set of enum and set
|
||||
/// columns, optimized to minimize
|
||||
/// space when many columns have the
|
||||
/// same charset
|
||||
ENUM_AND_SET_DEFAULT_CHARSET,
|
||||
/// Character set of enum and set
|
||||
/// columns, optimized to minimize
|
||||
/// space when many columns have the
|
||||
/// same charset
|
||||
ENUM_AND_SET_COLUMN_CHARSET,
|
||||
/// Flag to indicate column visibility attribute
|
||||
COLUMN_VISIBILITY
|
||||
};
|
||||
|
||||
protected:
|
||||
void parseImpl(ReadBuffer & payload) override;
|
||||
void parseMeta(String meta);
|
||||
void parseOptionalMetaField(ReadBuffer & payload);
|
||||
};
|
||||
|
||||
enum RowsEventFlags
|
||||
@ -486,6 +544,7 @@ namespace MySQLReplication
|
||||
std::unordered_set<String> replicate_tables;
|
||||
std::map<UInt64, std::shared_ptr<TableMapEvent> > table_maps;
|
||||
size_t checksum_signature_length = 4;
|
||||
MySQLCharsetPtr flavor_charset = std::make_shared<MySQLCharset>();
|
||||
|
||||
bool doReplicate(UInt64 table_id);
|
||||
bool doReplicate(const String & db, const String & table_name);
|
||||
|
@ -761,7 +761,7 @@ class IColumn;
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
@ -775,7 +775,7 @@ class IColumn;
|
||||
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
351
src/Core/tests/gtest_charset_conv.cpp
Normal file
351
src/Core/tests/gtest_charset_conv.cpp
Normal file
@ -0,0 +1,351 @@
|
||||
#include <Core/MySQL/MySQLCharset.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstdio>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct CheckResult
|
||||
{
|
||||
Int32 id;
|
||||
String name;
|
||||
bool need_convert;
|
||||
};
|
||||
|
||||
TEST(CharsetTest, CharsetTest)
|
||||
{
|
||||
MySQLCharset charset;
|
||||
UInt32 big5_id = 1;
|
||||
UInt32 gbk_id = 28;
|
||||
UInt32 gb2312_id = 24;
|
||||
UInt32 utf8mb4_ai_ci_id = 255;
|
||||
EXPECT_TRUE(charset.needConvert(big5_id));
|
||||
EXPECT_TRUE(charset.needConvert(gbk_id));
|
||||
EXPECT_TRUE(charset.needConvert(gb2312_id));
|
||||
EXPECT_FALSE(charset.needConvert(utf8mb4_ai_ci_id));
|
||||
EXPECT_FALSE(charset.needConvert(0));
|
||||
EXPECT_FALSE(charset.needConvert(1000));
|
||||
|
||||
EXPECT_EQ(charset.getCharsetFromId(big5_id), String("big5"));
|
||||
EXPECT_EQ(charset.getCharsetFromId(gbk_id), String("gbk"));
|
||||
EXPECT_EQ(charset.getCharsetFromId(gb2312_id), String("gb2312"));
|
||||
}
|
||||
|
||||
TEST(CharsetTest, ConvTest)
|
||||
{
|
||||
MySQLCharset charset;
|
||||
UInt32 big5_id = 1;
|
||||
UInt32 gbk_id = 28;
|
||||
UInt32 gb2312_id = 24;
|
||||
Int32 error = 0;
|
||||
String source("\xc4\xe3\xba\xc3"); // gbk "你好"
|
||||
String target;
|
||||
String expect("\xe4\xbd\xa0\xe5\xa5\xbd");
|
||||
|
||||
error = charset.convertFromId(gbk_id, target, source);
|
||||
EXPECT_EQ(error, 0);
|
||||
EXPECT_TRUE(target == expect);
|
||||
|
||||
error = charset.convertFromId(gb2312_id, target, source);
|
||||
EXPECT_EQ(error, 0);
|
||||
EXPECT_TRUE(target == expect);
|
||||
|
||||
source.assign("\xa7\x41\xa6\x6e"); // big5 "你好"
|
||||
error = charset.convertFromId(big5_id, target, source);
|
||||
EXPECT_EQ(error, 0);
|
||||
EXPECT_TRUE(target == expect);
|
||||
}
|
||||
|
||||
TEST(CharsetTest, FullCharsetCheck)
|
||||
{
|
||||
CheckResult result[] =
|
||||
{
|
||||
{1, "big5", true}, // "big5_chinese_ci",
|
||||
{2, "latin2", true}, // "latin2_czech_cs",
|
||||
{3, "dec8", true}, // "dec8_swedish_ci",
|
||||
{4, "cp850", true}, // "cp850_general_ci",
|
||||
{5, "latin1", true}, // "latin1_german1_ci",
|
||||
{6, "hp8", true}, // "hp8_english_ci",
|
||||
{7, "koi8r", true}, // "koi8r_general_ci",
|
||||
{8, "latin1", true}, // "latin1_swedish_ci",
|
||||
{9, "latin2", true}, // "latin2_general_ci",
|
||||
{10, "swe7", true}, // "swe7_swedish_ci",
|
||||
{11, "ascii", true}, // "ascii_general_ci",
|
||||
{12, "ujis", true}, // "ujis_japanese_ci",
|
||||
{13, "sjis", true}, // "sjis_japanese_ci",
|
||||
{14, "cp1251", true}, // "cp1251_bulgarian_ci",
|
||||
{15, "latin1", true}, // "latin1_danish_ci",
|
||||
{16, "hebrew", true}, // "hebrew_general_ci",
|
||||
{18, "tis620", true}, // "tis620_thai_ci",
|
||||
{19, "euckr", true}, // "euckr_korean_ci",
|
||||
{20, "latin7", true}, // "latin7_estonian_cs",
|
||||
{21, "latin2", true}, // "latin2_hungarian_ci",
|
||||
{22, "koi8u", true}, // "koi8u_general_ci",
|
||||
{23, "cp1251", true}, // "cp1251_ukrainian_ci",
|
||||
{24, "gb2312", true}, // "gb2312_chinese_ci",
|
||||
{25, "greek", true}, // "greek_general_ci",
|
||||
{26, "cp1250", true}, // "cp1250_general_ci",
|
||||
{27, "latin2", true}, // "latin2_croatian_ci",
|
||||
{28, "gbk", true}, // "gbk_chinese_ci",
|
||||
{29, "cp1257", true}, // "cp1257_lithuanian_ci",
|
||||
{30, "latin5", true}, // "latin5_turkish_ci",
|
||||
{31, "latin1", true}, // "latin1_german2_ci",
|
||||
{32, "armscii8", true}, // "armscii8_general_ci",
|
||||
{33, "utf8", false}, // "utf8_general_ci",
|
||||
{34, "cp1250", true}, // "cp1250_czech_cs",
|
||||
{35, "ucs2", true}, // "ucs2_general_ci",
|
||||
{36, "cp866", true}, // "cp866_general_ci",
|
||||
{37, "keybcs2", true}, // "keybcs2_general_ci",
|
||||
{38, "macce", true}, // "macce_general_ci",
|
||||
{39, "macroman", true}, // "macroman_general_ci",
|
||||
{40, "cp852", true}, // "cp852_general_ci",
|
||||
{41, "latin7", true}, // "latin7_general_ci",
|
||||
{42, "latin7", true}, // "latin7_general_cs",
|
||||
{43, "macce", true}, // "macce_bin",
|
||||
{44, "cp1250", true}, // "cp1250_croatian_ci",
|
||||
{45, "utf8mb4", false}, // "utf8mb4_general_ci",
|
||||
{46, "utf8mb4", false}, // "utf8mb4_bin",
|
||||
{47, "latin1", true}, // "latin1_bin",
|
||||
{48, "latin1", true}, // "latin1_general_ci",
|
||||
{49, "latin1", true}, // "latin1_general_cs",
|
||||
{50, "cp1251", true}, // "cp1251_bin",
|
||||
{51, "cp1251", true}, // "cp1251_general_ci",
|
||||
{52, "cp1251", true}, // "cp1251_general_cs",
|
||||
{53, "macroman", true}, // "macroman_bin",
|
||||
{54, "utf16", true}, // "utf16_general_ci",
|
||||
{55, "utf16", true}, // "utf16_bin",
|
||||
{56, "utf16le", true}, // "utf16le_general_ci",
|
||||
{57, "cp1256", true}, // "cp1256_general_ci",
|
||||
{58, "cp1257", true}, // "cp1257_bin",
|
||||
{59, "cp1257", true}, // "cp1257_general_ci",
|
||||
{60, "utf32", true}, // "utf32_general_ci",
|
||||
{61, "utf32", true}, // "utf32_bin",
|
||||
{62, "utf16le", true}, // "utf16le_bin",
|
||||
{64, "armscii8", true}, // "armscii8_bin",
|
||||
{65, "ascii", true}, // "ascii_bin",
|
||||
{66, "cp1250", true}, // "cp1250_bin",
|
||||
{67, "cp1256", true}, // "cp1256_bin",
|
||||
{68, "cp866", true}, // "cp866_bin",
|
||||
{69, "dec8", true}, // "dec8_bin",
|
||||
{70, "greek", true}, // "greek_bin",
|
||||
{71, "hebrew", true}, // "hebrew_bin",
|
||||
{72, "hp8", true}, // "hp8_bin",
|
||||
{73, "keybcs2", true}, // "keybcs2_bin",
|
||||
{74, "koi8r", true}, // "koi8r_bin",
|
||||
{75, "koi8u", true}, // "koi8u_bin",
|
||||
{77, "latin2", true}, // "latin2_bin",
|
||||
{78, "latin5", true}, // "latin5_bin",
|
||||
{79, "latin7", true}, // "latin7_bin",
|
||||
{80, "cp850", true}, // "cp850_bin",
|
||||
{81, "cp852", true}, // "cp852_bin",
|
||||
{82, "swe7", true}, // "swe7_bin",
|
||||
{83, "utf8", false}, // "utf8_bin",
|
||||
{84, "big5", true}, // "big5_bin",
|
||||
{85, "euckr", true}, // "euckr_bin",
|
||||
{86, "gb2312", true}, // "gb2312_bin",
|
||||
{87, "gbk", true}, // "gbk_bin",
|
||||
{88, "sjis", true}, // "sjis_bin",
|
||||
{89, "tis620", true}, // "tis620_bin",
|
||||
{90, "ucs2", true}, // "ucs2_bin",
|
||||
{91, "ujis", true}, // "ujis_bin",
|
||||
{92, "geostd8", true}, // "geostd8_general_ci",
|
||||
{93, "geostd8", true}, // "geostd8_bin",
|
||||
{94, "latin1", true}, // "latin1_spanish_ci",
|
||||
{95, "cp932", true}, // "cp932_japanese_ci",
|
||||
{96, "cp932", true}, // "cp932_bin",
|
||||
{97, "eucjpms", true}, // "eucjpms_japanese_ci",
|
||||
{98, "eucjpms", true}, // "eucjpms_bin",
|
||||
{99, "cp1250", true}, // "cp1250_polish_ci",
|
||||
{101, "utf16", true}, // "utf16_unicode_ci",
|
||||
{102, "utf16", true}, // "utf16_icelandic_ci",
|
||||
{103, "utf16", true}, // "utf16_latvian_ci",
|
||||
{104, "utf16", true}, // "utf16_romanian_ci",
|
||||
{105, "utf16", true}, // "utf16_slovenian_ci",
|
||||
{106, "utf16", true}, // "utf16_polish_ci",
|
||||
{107, "utf16", true}, // "utf16_estonian_ci",
|
||||
{108, "utf16", true}, // "utf16_spanish_ci",
|
||||
{109, "utf16", true}, // "utf16_swedish_ci",
|
||||
{110, "utf16", true}, // "utf16_turkish_ci",
|
||||
{111, "utf16", true}, // "utf16_czech_ci",
|
||||
{112, "utf16", true}, // "utf16_danish_ci",
|
||||
{113, "utf16", true}, // "utf16_lithuanian_ci",
|
||||
{114, "utf16", true}, // "utf16_slovak_ci",
|
||||
{115, "utf16", true}, // "utf16_spanish2_ci",
|
||||
{116, "utf16", true}, // "utf16_roman_ci",
|
||||
{117, "utf16", true}, // "utf16_persian_ci",
|
||||
{118, "utf16", true}, // "utf16_esperanto_ci",
|
||||
{119, "utf16", true}, // "utf16_hungarian_ci",
|
||||
{120, "utf16", true}, // "utf16_sinhala_ci",
|
||||
{121, "utf16", true}, // "utf16_german2_ci",
|
||||
{122, "utf16", true}, // "utf16_croatian_ci",
|
||||
{123, "utf16", true}, // "utf16_unicode_520_ci",
|
||||
{124, "utf16", true}, // "utf16_vietnamese_ci",
|
||||
{128, "ucs2", true}, // "ucs2_unicode_ci",
|
||||
{129, "ucs2", true}, // "ucs2_icelandic_ci",
|
||||
{130, "ucs2", true}, // "ucs2_latvian_ci",
|
||||
{131, "ucs2", true}, // "ucs2_romanian_ci",
|
||||
{132, "ucs2", true}, // "ucs2_slovenian_ci",
|
||||
{133, "ucs2", true}, // "ucs2_polish_ci",
|
||||
{134, "ucs2", true}, // "ucs2_estonian_ci",
|
||||
{135, "ucs2", true}, // "ucs2_spanish_ci",
|
||||
{136, "ucs2", true}, // "ucs2_swedish_ci",
|
||||
{137, "ucs2", true}, // "ucs2_turkish_ci",
|
||||
{138, "ucs2", true}, // "ucs2_czech_ci",
|
||||
{139, "ucs2", true}, // "ucs2_danish_ci",
|
||||
{140, "ucs2", true}, // "ucs2_lithuanian_ci",
|
||||
{141, "ucs2", true}, // "ucs2_slovak_ci",
|
||||
{142, "ucs2", true}, // "ucs2_spanish2_ci",
|
||||
{143, "ucs2", true}, // "ucs2_roman_ci",
|
||||
{144, "ucs2", true}, // "ucs2_persian_ci",
|
||||
{145, "ucs2", true}, // "ucs2_esperanto_ci",
|
||||
{146, "ucs2", true}, // "ucs2_hungarian_ci",
|
||||
{147, "ucs2", true}, // "ucs2_sinhala_ci",
|
||||
{148, "ucs2", true}, // "ucs2_german2_ci",
|
||||
{149, "ucs2", true}, // "ucs2_croatian_ci",
|
||||
{150, "ucs2", true}, // "ucs2_unicode_520_ci",
|
||||
{151, "ucs2", true}, // "ucs2_vietnamese_ci",
|
||||
{159, "ucs2", true}, // "ucs2_general_mysql500_ci",
|
||||
{160, "utf32", true}, // "utf32_unicode_ci",
|
||||
{161, "utf32", true}, // "utf32_icelandic_ci",
|
||||
{162, "utf32", true}, // "utf32_latvian_ci",
|
||||
{163, "utf32", true}, // "utf32_romanian_ci",
|
||||
{164, "utf32", true}, // "utf32_slovenian_ci",
|
||||
{165, "utf32", true}, // "utf32_polish_ci",
|
||||
{166, "utf32", true}, // "utf32_estonian_ci",
|
||||
{167, "utf32", true}, // "utf32_spanish_ci",
|
||||
{168, "utf32", true}, // "utf32_swedish_ci",
|
||||
{169, "utf32", true}, // "utf32_turkish_ci",
|
||||
{170, "utf32", true}, // "utf32_czech_ci",
|
||||
{171, "utf32", true}, // "utf32_danish_ci",
|
||||
{172, "utf32", true}, // "utf32_lithuanian_ci",
|
||||
{173, "utf32", true}, // "utf32_slovak_ci",
|
||||
{174, "utf32", true}, // "utf32_spanish2_ci",
|
||||
{175, "utf32", true}, // "utf32_roman_ci",
|
||||
{176, "utf32", true}, // "utf32_persian_ci",
|
||||
{177, "utf32", true}, // "utf32_esperanto_ci",
|
||||
{178, "utf32", true}, // "utf32_hungarian_ci",
|
||||
{179, "utf32", true}, // "utf32_sinhala_ci",
|
||||
{180, "utf32", true}, // "utf32_german2_ci",
|
||||
{181, "utf32", true}, // "utf32_croatian_ci",
|
||||
{182, "utf32", true}, // "utf32_unicode_520_ci",
|
||||
{183, "utf32", true}, // "utf32_vietnamese_ci",
|
||||
{192, "utf8", false}, // "utf8_unicode_ci",
|
||||
{193, "utf8", false}, // "utf8_icelandic_ci",
|
||||
{194, "utf8", false}, // "utf8_latvian_ci",
|
||||
{195, "utf8", false}, // "utf8_romanian_ci",
|
||||
{196, "utf8", false}, // "utf8_slovenian_ci",
|
||||
{197, "utf8", false}, // "utf8_polish_ci",
|
||||
{198, "utf8", false}, // "utf8_estonian_ci",
|
||||
{199, "utf8", false}, // "utf8_spanish_ci",
|
||||
{200, "utf8", false}, // "utf8_swedish_ci",
|
||||
{201, "utf8", false}, // "utf8_turkish_ci",
|
||||
{202, "utf8", false}, // "utf8_czech_ci",
|
||||
{203, "utf8", false}, // "utf8_danish_ci",
|
||||
{204, "utf8", false}, // "utf8_lithuanian_ci",
|
||||
{205, "utf8", false}, // "utf8_slovak_ci",
|
||||
{206, "utf8", false}, // "utf8_spanish2_ci",
|
||||
{207, "utf8", false}, // "utf8_roman_ci",
|
||||
{208, "utf8", false}, // "utf8_persian_ci",
|
||||
{209, "utf8", false}, // "utf8_esperanto_ci",
|
||||
{210, "utf8", false}, // "utf8_hungarian_ci",
|
||||
{211, "utf8", false}, // "utf8_sinhala_ci",
|
||||
{212, "utf8", false}, // "utf8_german2_ci",
|
||||
{213, "utf8", false}, // "utf8_croatian_ci",
|
||||
{214, "utf8", false}, // "utf8_unicode_520_ci",
|
||||
{215, "utf8", false}, // "utf8_vietnamese_ci",
|
||||
{223, "utf8", false}, // "utf8_general_mysql500_ci",
|
||||
{224, "utf8mb4", false}, // "utf8mb4_unicode_ci",
|
||||
{225, "utf8mb4", false}, // "utf8mb4_icelandic_ci",
|
||||
{226, "utf8mb4", false}, // "utf8mb4_latvian_ci",
|
||||
{227, "utf8mb4", false}, // "utf8mb4_romanian_ci",
|
||||
{228, "utf8mb4", false}, // "utf8mb4_slovenian_ci",
|
||||
{229, "utf8mb4", false}, // "utf8mb4_polish_ci",
|
||||
{230, "utf8mb4", false}, // "utf8mb4_estonian_ci",
|
||||
{231, "utf8mb4", false}, // "utf8mb4_spanish_ci",
|
||||
{232, "utf8mb4", false}, // "utf8mb4_swedish_ci",
|
||||
{233, "utf8mb4", false}, // "utf8mb4_turkish_ci",
|
||||
{234, "utf8mb4", false}, // "utf8mb4_czech_ci",
|
||||
{235, "utf8mb4", false}, // "utf8mb4_danish_ci",
|
||||
{236, "utf8mb4", false}, // "utf8mb4_lithuanian_ci",
|
||||
{237, "utf8mb4", false}, // "utf8mb4_slovak_ci",
|
||||
{238, "utf8mb4", false}, // "utf8mb4_spanish2_ci",
|
||||
{239, "utf8mb4", false}, // "utf8mb4_roman_ci",
|
||||
{240, "utf8mb4", false}, // "utf8mb4_persian_ci",
|
||||
{241, "utf8mb4", false}, // "utf8mb4_esperanto_ci",
|
||||
{242, "utf8mb4", false}, // "utf8mb4_hungarian_ci",
|
||||
{243, "utf8mb4", false}, // "utf8mb4_sinhala_ci",
|
||||
{244, "utf8mb4", false}, // "utf8mb4_german2_ci",
|
||||
{245, "utf8mb4", false}, // "utf8mb4_croatian_ci",
|
||||
{246, "utf8mb4", false}, // "utf8mb4_unicode_520_ci",
|
||||
{247, "utf8mb4", false}, // "utf8mb4_vietnamese_ci",
|
||||
{248, "gb18030", true}, // "gb18030_chinese_ci",
|
||||
{249, "gb18030", true}, // "gb18030_bin",
|
||||
{250, "gb18030", true}, // "gb18030_unicode_520_ci",
|
||||
{255, "utf8mb4", false}, // "utf8mb4_0900_ai_ci",
|
||||
{256, "utf8mb4", false}, // "utf8mb4_de_pb_0900_ai_ci",
|
||||
{257, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
|
||||
{258, "utf8mb4", false}, // "utf8mb4_lv_0900_ai_ci",
|
||||
{259, "utf8mb4", false}, // "utf8mb4_ro_0900_ai_ci",
|
||||
{260, "utf8mb4", false}, // "utf8mb4_sl_0900_ai_ci",
|
||||
{261, "utf8mb4", false}, // "utf8mb4_pl_0900_ai_ci",
|
||||
{262, "utf8mb4", false}, // "utf8mb4_et_0900_ai_ci",
|
||||
{263, "utf8mb4", false}, // "utf8mb4_es_0900_ai_ci",
|
||||
{264, "utf8mb4", false}, // "utf8mb4_is_0900_ai_ci",
|
||||
{265, "utf8mb4", false}, // "utf8mb4_tr_0900_ai_ci",
|
||||
{266, "utf8mb4", false}, // "utf8mb4_cs_0900_ai_ci",
|
||||
{267, "utf8mb4", false}, // "utf8mb4_da_0900_ai_ci",
|
||||
{268, "utf8mb4", false}, // "utf8mb4_lt_0900_ai_ci",
|
||||
{269, "utf8mb4", false}, // "utf8mb4_sk_0900_ai_ci",
|
||||
{270, "utf8mb4", false}, // "utf8mb4_es_trad_0900_ai_ci",
|
||||
{271, "utf8mb4", false}, // "utf8mb4_la_0900_ai_ci",
|
||||
{272, "utf8mb4", false}, // "utf8mb4_fa_0900_ai_ci",
|
||||
{273, "utf8mb4", false}, // "utf8mb4_eo_0900_ai_ci",
|
||||
{274, "utf8mb4", false}, // "utf8mb4_hu_0900_ai_ci",
|
||||
{275, "utf8mb4", false}, // "utf8mb4_hr_0900_ai_ci",
|
||||
{276, "utf8mb4", false}, // "utf8mb4_si_0900_ai_ci",
|
||||
{277, "utf8mb4", false}, // "utf8mb4_vi_0900_ai_ci",
|
||||
{278, "utf8mb4", false}, // "utf8mb4_0900_as_cs",
|
||||
{279, "utf8mb4", false}, // "utf8mb4_de_pb_0900_as_cs",
|
||||
{280, "utf8mb4", false}, // "utf8mb4_is_0900_as_cs",
|
||||
{281, "utf8mb4", false}, // "utf8mb4_lv_0900_as_cs",
|
||||
{282, "utf8mb4", false}, // "utf8mb4_ro_0900_as_cs",
|
||||
{283, "utf8mb4", false}, // "utf8mb4_sl_0900_as_cs",
|
||||
{284, "utf8mb4", false}, // "utf8mb4_pl_0900_as_cs",
|
||||
{285, "utf8mb4", false}, // "utf8mb4_et_0900_as_cs",
|
||||
{286, "utf8mb4", false}, // "utf8mb4_es_0900_as_cs",
|
||||
{287, "utf8mb4", false}, // "utf8mb4_sv_0900_as_cs",
|
||||
{288, "utf8mb4", false}, // "utf8mb4_tr_0900_as_cs",
|
||||
{289, "utf8mb4", false}, // "utf8mb4_cs_0900_as_cs",
|
||||
{290, "utf8mb4", false}, // "utf8mb4_da_0900_as_cs"
|
||||
{291, "utf8mb4", false}, // "utf8mb4_lt_0900_as_cs"
|
||||
{292, "utf8mb4", false}, // "utf8mb4_sk_0900_as_cs"
|
||||
{293, "utf8mb4", false}, // "utf8mb4_es_trad_0900_as_cs"
|
||||
{294, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
|
||||
{295, "utf8mb4", false}, // "utf8mb4_fa_0900_as_cs"
|
||||
{296, "utf8mb4", false}, // "utf8mb4_eo_0900_as_cs"
|
||||
{297, "utf8mb4", false}, // "utf8mb4_hu_0900_as_cs"
|
||||
{298, "utf8mb4", false}, // "utf8mb4_hr_0900_as_cs"
|
||||
{299, "utf8mb4", false}, // "utf8mb4_si_0900_as_cs"
|
||||
{300, "utf8mb4", false}, // "utf8mb4_vi_0900_as_cs"
|
||||
{303, "utf8mb4", false}, // "utf8mb4_ja_0900_as_cs_ks"
|
||||
{304, "utf8mb4", false}, // "utf8mb4_la_0900_as_cs"
|
||||
{305, "utf8mb4", false}, // "utf8mb4_0900_as_ci"
|
||||
{306, "utf8mb4", false}, // "utf8mb4_ru_0900_ai_ci"
|
||||
{307, "utf8mb4", false}, // "utf8mb4_ru_0900_as_cs"
|
||||
{308, "utf8mb4", false}, // "utf8mb4_zh_0900_as_cs"
|
||||
{309, "utf8mb4", false} // "utf8mb4_0900_bin"
|
||||
};
|
||||
|
||||
MySQLCharset charset;
|
||||
|
||||
for (auto & item : result)
|
||||
{
|
||||
EXPECT_TRUE(charset.needConvert(item.id) == item.need_convert);
|
||||
if (charset.needConvert(item.id))
|
||||
{
|
||||
EXPECT_TRUE(charset.getCharsetFromId(item.id) == item.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -441,11 +441,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseAtomic::loadStoredObjects(
|
||||
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
|
||||
void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
|
||||
{
|
||||
beforeLoadingMetadata(local_context, mode);
|
||||
DatabaseOrdinary::loadStoredObjects(local_context, mode, skip_startup_tables);
|
||||
DatabaseOrdinary::loadStoredObjects(local_context, mode);
|
||||
}
|
||||
|
||||
void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode)
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
|
||||
|
||||
void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
|
||||
|
||||
|
@ -37,8 +37,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_,
|
||||
}
|
||||
|
||||
|
||||
void DatabaseLazy::loadStoredObjects(
|
||||
ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
|
||||
void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/)
|
||||
{
|
||||
iterateMetadataFiles(local_context, [this, &local_context](const String & file_name)
|
||||
{
|
||||
|
@ -26,7 +26,7 @@ public:
|
||||
|
||||
bool canContainDistributedTables() const override { return false; }
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/) override;
|
||||
|
||||
void createTable(
|
||||
ContextPtr context,
|
||||
|
@ -89,8 +89,7 @@ DatabaseOrdinary::DatabaseOrdinary(
|
||||
{
|
||||
}
|
||||
|
||||
void DatabaseOrdinary::loadStoredObjects(
|
||||
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
|
||||
void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
|
||||
{
|
||||
/** Tables load faster if they are loaded in sorted (by name) order.
|
||||
* Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order,
|
||||
@ -159,12 +158,6 @@ void DatabaseOrdinary::loadStoredObjects(
|
||||
}
|
||||
|
||||
pool.wait();
|
||||
|
||||
if (!skip_startup_tables)
|
||||
{
|
||||
/// After all tables was basically initialized, startup them.
|
||||
startupTables(pool, mode);
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup)
|
||||
|
@ -21,7 +21,7 @@ public:
|
||||
|
||||
String getEngineName() const override { return "Ordinary"; }
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
|
||||
|
||||
bool supportsLoadingInTopologicalOrder() const override { return true; }
|
||||
|
||||
|
@ -495,11 +495,10 @@ void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, Lo
|
||||
tryConnectToZooKeeperAndInitDatabase(mode);
|
||||
}
|
||||
|
||||
void DatabaseReplicated::loadStoredObjects(
|
||||
ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables)
|
||||
void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
|
||||
{
|
||||
beforeLoadingMetadata(local_context, mode);
|
||||
DatabaseAtomic::loadStoredObjects(local_context, mode, skip_startup_tables);
|
||||
DatabaseAtomic::loadStoredObjects(local_context, mode);
|
||||
}
|
||||
|
||||
UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
|
||||
void drop(ContextPtr /*context*/) override;
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
|
||||
|
||||
void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
|
||||
|
||||
|
@ -134,8 +134,7 @@ public:
|
||||
/// You can call only once, right after the object is created.
|
||||
virtual void loadStoredObjects( /// NOLINT
|
||||
ContextMutablePtr /*context*/,
|
||||
LoadingStrictnessLevel /*mode*/,
|
||||
bool /* skip_startup_tables */)
|
||||
LoadingStrictnessLevel /*mode*/)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -402,7 +402,7 @@ String DatabaseMySQL::getMetadataPath() const
|
||||
return metadata_path;
|
||||
}
|
||||
|
||||
void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
|
||||
void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/)
|
||||
{
|
||||
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
|
||||
void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override;
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
|
||||
|
||||
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
|
||||
|
||||
|
@ -296,7 +296,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/)
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */)
|
||||
void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -44,7 +44,7 @@ public:
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override;
|
||||
void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
|
||||
|
||||
|
@ -49,7 +49,7 @@ void TablesLoader::loadTables()
|
||||
if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder())
|
||||
databases_to_load.push_back(database.first);
|
||||
else
|
||||
database.second->loadStoredObjects(global_context, strictness_mode, /* skip_startup_tables */ true);
|
||||
database.second->loadStoredObjects(global_context, strictness_mode);
|
||||
}
|
||||
|
||||
if (databases_to_load.empty())
|
||||
|
@ -114,7 +114,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
|
||||
/// It reports real time spent including the time spent while thread was preempted doing nothing.
|
||||
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
|
||||
/// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
|
||||
/// (TaskStatsInfoGetter has about 500K RPS).
|
||||
/// (NetlinkMetricsProvider has about 500K RPS).
|
||||
Stopwatch watch(CLOCK_MONOTONIC);
|
||||
|
||||
SCOPE_EXIT({
|
||||
|
@ -33,46 +33,18 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
|
||||
|
||||
bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
|
||||
{
|
||||
fs::path fs_path(path);
|
||||
if (fs_path.has_extension())
|
||||
fs_path = fs_path.parent_path();
|
||||
|
||||
initializeIfNeeded(fs_path);
|
||||
|
||||
if (object_storage.files.empty())
|
||||
return false;
|
||||
|
||||
if (object_storage.files.contains(path))
|
||||
return true;
|
||||
|
||||
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
|
||||
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
|
||||
auto it = std::lower_bound(
|
||||
object_storage.files.begin(),
|
||||
object_storage.files.end(),
|
||||
path,
|
||||
[](const auto & file, const std::string & path_) { return file.first < path_; }
|
||||
);
|
||||
if (it == object_storage.files.end())
|
||||
return false;
|
||||
|
||||
if (startsWith(it->first, path)
|
||||
|| (it != object_storage.files.begin() && startsWith(std::prev(it)->first, path)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return object_storage.exists(path);
|
||||
}
|
||||
|
||||
void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
|
||||
{
|
||||
initializeIfNeeded(path);
|
||||
|
||||
if (!exists(path))
|
||||
#ifdef NDEBUG
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
|
||||
#else
|
||||
{
|
||||
std::string all_files;
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file, _] : object_storage.files)
|
||||
{
|
||||
if (!all_files.empty())
|
||||
@ -87,33 +59,40 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
|
||||
bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
|
||||
}
|
||||
|
||||
bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
|
||||
}
|
||||
|
||||
uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return object_storage.files.at(path).size;
|
||||
}
|
||||
|
||||
StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
|
||||
{
|
||||
assertExists(path);
|
||||
|
||||
auto fs_path = fs::path(object_storage.url) / path;
|
||||
std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
|
||||
remote_path = remote_path.substr(object_storage.url.size());
|
||||
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
|
||||
}
|
||||
|
||||
std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file_path, _] : object_storage.files)
|
||||
{
|
||||
if (file_path.starts_with(path))
|
||||
@ -122,22 +101,14 @@ std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(
|
||||
return result;
|
||||
}
|
||||
|
||||
void MetadataStorageFromStaticFilesWebServer::initializeIfNeeded(const std::string & path) const
|
||||
{
|
||||
if (object_storage.files.find(path) == object_storage.files.end())
|
||||
{
|
||||
object_storage.initialize(fs::path(object_storage.url) / path);
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(const std::string & path) const
|
||||
{
|
||||
std::vector<fs::path> dir_file_paths;
|
||||
|
||||
initializeIfNeeded(path);
|
||||
if (!exists(path))
|
||||
return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
|
||||
|
||||
std::shared_lock shared_lock(object_storage.metadata_mutex);
|
||||
for (const auto & [file_path, _] : object_storage.files)
|
||||
{
|
||||
if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "")
|
||||
|
@ -13,13 +13,14 @@ class MetadataStorageFromStaticFilesWebServer final : public IMetadataStorage
|
||||
{
|
||||
private:
|
||||
friend class MetadataStorageFromStaticFilesWebServerTransaction;
|
||||
using FileType = WebObjectStorage::FileType;
|
||||
|
||||
const WebObjectStorage & object_storage;
|
||||
std::string root_path;
|
||||
|
||||
void assertExists(const std::string & path) const;
|
||||
|
||||
void initializeIfNeeded(const std::string & path) const;
|
||||
void initializeImpl(const String & uri_path, const std::unique_lock<std::shared_mutex> &) const;
|
||||
|
||||
public:
|
||||
explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_);
|
||||
|
@ -28,10 +28,9 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NETWORK_ERROR;
|
||||
}
|
||||
|
||||
void WebObjectStorage::initialize(const String & uri_path) const
|
||||
void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
|
||||
{
|
||||
std::vector<String> directories_to_load;
|
||||
LOG_TRACE(log, "Loading metadata for directory: {}", uri_path);
|
||||
@ -81,8 +80,9 @@ void WebObjectStorage::initialize(const String & uri_path) const
|
||||
}
|
||||
|
||||
file_path = file_path.substr(url.size());
|
||||
files.emplace(std::make_pair(file_path, file_data));
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size);
|
||||
|
||||
files.emplace(std::make_pair(file_path, file_data));
|
||||
}
|
||||
|
||||
files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory })));
|
||||
@ -103,7 +103,7 @@ void WebObjectStorage::initialize(const String & uri_path) const
|
||||
}
|
||||
|
||||
for (const auto & directory_path : directories_to_load)
|
||||
initialize(directory_path);
|
||||
initialize(directory_path, lock);
|
||||
}
|
||||
|
||||
|
||||
@ -118,31 +118,51 @@ WebObjectStorage::WebObjectStorage(
|
||||
|
||||
bool WebObjectStorage::exists(const StoredObject & object) const
|
||||
{
|
||||
const auto & path = object.remote_path;
|
||||
return exists(object.remote_path);
|
||||
}
|
||||
|
||||
bool WebObjectStorage::exists(const std::string & path) const
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
|
||||
|
||||
if (files.find(path) != files.end())
|
||||
std::shared_lock shared_lock(metadata_mutex);
|
||||
|
||||
if (files.find(path) == files.end())
|
||||
{
|
||||
shared_lock.unlock();
|
||||
std::unique_lock unique_lock(metadata_mutex);
|
||||
if (files.find(path) == files.end())
|
||||
{
|
||||
fs::path index_file_dir = fs::path(url) / path;
|
||||
if (index_file_dir.has_extension())
|
||||
index_file_dir = index_file_dir.parent_path();
|
||||
|
||||
initialize(index_file_dir, unique_lock);
|
||||
}
|
||||
/// Files are never deleted from `files` as disk is read only, so no worry that we unlock now.
|
||||
unique_lock.unlock();
|
||||
shared_lock.lock();
|
||||
}
|
||||
|
||||
if (files.empty())
|
||||
return false;
|
||||
|
||||
if (files.contains(path))
|
||||
return true;
|
||||
|
||||
if (path.ends_with(MergeTreeData::FORMAT_VERSION_FILE_NAME) && files.find(fs::path(path).parent_path() / "") == files.end())
|
||||
{
|
||||
try
|
||||
{
|
||||
initialize(fs::path(url) / fs::path(path).parent_path());
|
||||
return files.find(path) != files.end();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
const auto message = getCurrentExceptionMessage(false);
|
||||
bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext();
|
||||
if (can_throw)
|
||||
throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message);
|
||||
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
|
||||
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
|
||||
auto it = std::lower_bound(
|
||||
files.begin(), files.end(), path,
|
||||
[](const auto & file, const std::string & path_) { return file.first < path_; }
|
||||
);
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (it == files.end())
|
||||
return false;
|
||||
|
||||
if (startsWith(it->first, path)
|
||||
|| (it != files.begin() && startsWith(std::prev(it)->first, path)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -93,9 +94,8 @@ public:
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
protected:
|
||||
void initialize(const String & uri_path) const;
|
||||
|
||||
[[noreturn]] static void throwNotAllowed();
|
||||
bool exists(const std::string & path) const;
|
||||
|
||||
enum class FileType
|
||||
{
|
||||
@ -111,12 +111,13 @@ protected:
|
||||
|
||||
using Files = std::map<String, FileData>; /// file path -> file data
|
||||
mutable Files files;
|
||||
|
||||
String url;
|
||||
mutable std::shared_mutex metadata_mutex;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;
|
||||
|
||||
const String url;
|
||||
Poco::Logger * log;
|
||||
size_t min_bytes_for_seek;
|
||||
};
|
||||
|
||||
|
@ -21,7 +21,6 @@ list (APPEND PUBLIC_LIBS
|
||||
dbms
|
||||
ch_contrib::metrohash
|
||||
ch_contrib::murmurhash
|
||||
ch_contrib::hashidsxx
|
||||
ch_contrib::morton_nd
|
||||
)
|
||||
|
||||
|
@ -1,12 +0,0 @@
|
||||
#include "FunctionHashID.h"
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(HashID)
|
||||
{
|
||||
factory.registerFunction<FunctionHashID>();
|
||||
}
|
||||
|
||||
}
|
@ -1,170 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <hashids.h>
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
// hashid(string, salt)
|
||||
class FunctionHashID : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hashid";
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettingsRef().allow_experimental_hash_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name);
|
||||
|
||||
return std::make_shared<FunctionHashID>();
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
|
||||
|
||||
const auto & id_col = arguments[0];
|
||||
if (!isUnsignedInteger(id_col.type))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument of function {} must be unsigned integer, got {}",
|
||||
getName(),
|
||||
arguments[0].type->getName());
|
||||
|
||||
if (arguments.size() > 1)
|
||||
{
|
||||
const auto & hash_col = arguments[1];
|
||||
if (!isString(hash_col.type))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Second argument of function {} must be String, got {}",
|
||||
getName(),
|
||||
arguments[1].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 2)
|
||||
{
|
||||
const auto & min_length_col = arguments[2];
|
||||
if (!isUInt8(min_length_col.type))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Third argument of function {} must be UInt8, got {}",
|
||||
getName(),
|
||||
arguments[2].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 3)
|
||||
{
|
||||
const auto & alphabet_col = arguments[3];
|
||||
if (!isString(alphabet_col.type))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Fourth argument of function {} must be String, got {}",
|
||||
getName(),
|
||||
arguments[3].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 4)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}",
|
||||
getName(),
|
||||
arguments.size());
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & numcolumn = arguments[0].column;
|
||||
|
||||
if (checkAndGetColumn<ColumnUInt8>(numcolumn.get()) || checkAndGetColumn<ColumnUInt16>(numcolumn.get())
|
||||
|| checkAndGetColumn<ColumnUInt32>(numcolumn.get()) || checkAndGetColumn<ColumnUInt64>(numcolumn.get()))
|
||||
{
|
||||
std::string salt;
|
||||
UInt8 min_length = 0;
|
||||
std::string alphabet;
|
||||
|
||||
if (arguments.size() >= 4)
|
||||
{
|
||||
const auto & alphabetcolumn = arguments[3].column;
|
||||
if (const auto * alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
|
||||
{
|
||||
alphabet = alpha_col->getValue<String>();
|
||||
if (alphabet.find('\0') != std::string::npos)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character");
|
||||
}
|
||||
}
|
||||
else
|
||||
alphabet.assign(DEFAULT_ALPHABET);
|
||||
|
||||
if (arguments.size() >= 3)
|
||||
{
|
||||
const auto & minlengthcolumn = arguments[2].column;
|
||||
if (const auto * min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
|
||||
min_length = min_length_col->getValue<UInt8>();
|
||||
}
|
||||
|
||||
if (arguments.size() >= 2)
|
||||
{
|
||||
const auto & saltcolumn = arguments[1].column;
|
||||
if (const auto * salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
|
||||
salt = salt_col->getValue<String>();
|
||||
}
|
||||
|
||||
hashidsxx::Hashids hash(salt, min_length, alphabet);
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
col_res->insert(hash.encode({numcolumn->getUInt(i)}));
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function hashid",
|
||||
arguments[0].column->getName());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -79,51 +79,28 @@ namespace impl
|
||||
UInt64 key1 = 0;
|
||||
};
|
||||
|
||||
struct SipHashKeyColumns
|
||||
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
ColumnPtr key0;
|
||||
ColumnPtr key1;
|
||||
bool is_const;
|
||||
SipHashKey ret{};
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
assert(key0 && key1);
|
||||
assert(key0->size() == key1->size());
|
||||
return key0->size();
|
||||
}
|
||||
SipHashKey getKey(size_t i) const
|
||||
{
|
||||
if (is_const)
|
||||
i = 0;
|
||||
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
|
||||
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
|
||||
return {key0data[i], key1data[i]};
|
||||
}
|
||||
};
|
||||
|
||||
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
const ColumnTuple * tuple = nullptr;
|
||||
const auto * column = key.column.get();
|
||||
bool is_const = false;
|
||||
if (isColumnConst(*column))
|
||||
{
|
||||
is_const = true;
|
||||
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
|
||||
}
|
||||
else
|
||||
tuple = checkAndGetColumn<ColumnTuple>(column);
|
||||
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
|
||||
if (!tuple)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
|
||||
|
||||
if (tuple->tupleSize() != 2)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
|
||||
assert(ret.key0);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key0))
|
||||
if (tuple->empty())
|
||||
return ret;
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
|
||||
assert(ret.key1);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key1))
|
||||
|
||||
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
|
||||
ret.key1 = key1col->get64(0);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
|
||||
|
||||
return ret;
|
||||
@ -352,10 +329,8 @@ struct SipHash64KeyedImpl
|
||||
static constexpr auto name = "sipHash64Keyed";
|
||||
using ReturnType = UInt64;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
|
||||
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -396,10 +371,8 @@ struct SipHash128KeyedImpl
|
||||
static constexpr auto name = "sipHash128Keyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -425,43 +398,13 @@ struct SipHash128ReferenceImpl
|
||||
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
struct SipHash128ReferenceKeyedImpl
|
||||
{
|
||||
static constexpr auto name = "sipHash128ReferenceKeyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
|
||||
{
|
||||
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
|
||||
}
|
||||
|
||||
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
UInt128 tmp;
|
||||
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
|
||||
h1 = tmp;
|
||||
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
|
||||
h2 = tmp;
|
||||
#endif
|
||||
UInt128 hashes[] = {h1, h2};
|
||||
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/** Why we need MurmurHash2?
|
||||
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
||||
@ -1080,7 +1023,7 @@ private:
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
|
||||
template <typename Impl, bool Keyed, typename KeyType>
|
||||
class FunctionAnyHash : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -1090,12 +1033,9 @@ private:
|
||||
using ToType = typename Impl::ReturnType;
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1104,9 +1044,6 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
@ -1140,14 +1077,6 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
ToType hash;
|
||||
|
||||
@ -1178,15 +1107,8 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1194,12 +1116,9 @@ private:
|
||||
}
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1208,9 +1127,6 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
else
|
||||
@ -1227,14 +1143,6 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
|
||||
ToType hash;
|
||||
@ -1250,15 +1158,8 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1266,16 +1167,10 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0, size = column->size(); i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
StringRef bytes = column->getDataAt(i);
|
||||
const ToType hash = apply(key, bytes.data, bytes.size);
|
||||
if constexpr (first)
|
||||
@ -1286,11 +1181,8 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
|
||||
{
|
||||
const typename ColumnString::Chars & data = col_from->getChars();
|
||||
@ -1300,9 +1192,6 @@ private:
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key,
|
||||
reinterpret_cast<const char *>(&data[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
@ -1323,9 +1212,6 @@ private:
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if constexpr (first)
|
||||
vec_to[i] = hash;
|
||||
@ -1335,14 +1221,6 @@ private:
|
||||
}
|
||||
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeString<first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
String value = col_from_const->getValue<String>();
|
||||
const ToType hash = apply(key, value.data(), value.size());
|
||||
const size_t size = vec_to.size();
|
||||
@ -1350,15 +1228,8 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1366,7 +1237,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
|
||||
|
||||
@ -1378,19 +1249,13 @@ private:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||
bool nested_is_first = true;
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
const size_t size = offsets.size();
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
ColumnArray::Offset next_offset = offsets[i];
|
||||
|
||||
ToType hash;
|
||||
@ -1414,7 +1279,7 @@ private:
|
||||
{
|
||||
/// NOTE: here, of course, you can do without the materialization of the column.
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
executeArray<first>(key_cols, type, full_column.get(), vec_to);
|
||||
executeArray<first>(key, type, full_column.get(), vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1422,7 +1287,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
WhichDataType which(from_type);
|
||||
|
||||
@ -1430,45 +1295,40 @@ private:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
|
||||
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
|
||||
|
||||
if constexpr (Keyed)
|
||||
if ((!key_cols.is_const && key_cols.size() != vec_to.size())
|
||||
|| (key_cols.is_const && key_cols.size() != 1))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key, icolumn, vec_to);
|
||||
}
|
||||
|
||||
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
{
|
||||
/// Flattening of tuples.
|
||||
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
|
||||
@ -1477,7 +1337,7 @@ private:
|
||||
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
|
||||
size_t tuple_size = tuple_columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
}
|
||||
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
|
||||
{
|
||||
@ -1487,24 +1347,24 @@ private:
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
auto tmp = ColumnConst::create(tuple_columns[i], column->size());
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
}
|
||||
}
|
||||
else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
|
||||
{
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
}
|
||||
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
|
||||
{
|
||||
executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_first)
|
||||
executeAny<true>(key_cols, type, column, vec_to);
|
||||
executeAny<true>(key, type, column, vec_to);
|
||||
else
|
||||
executeAny<false>(key_cols, type, column, vec_to);
|
||||
executeAny<false>(key, type, column, vec_to);
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
@ -1535,39 +1395,39 @@ public:
|
||||
{
|
||||
auto col_to = ColumnVector<ToType>::create(input_rows_count);
|
||||
|
||||
if (input_rows_count != 0)
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
|
||||
if (arguments.size() <= first_data_argument)
|
||||
{
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key = Impl::parseKey(arguments[0]);
|
||||
|
||||
if (arguments.size() <= first_data_argument)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
KeyColumnsType key_cols{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key_cols = Impl::parseKeyColumns(arguments[0]);
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
}
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
{
|
||||
auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128));
|
||||
col_to_fixed_string->getChars() = std::move(*reinterpret_cast<ColumnFixedString::Chars *>(&col_to->getData()));
|
||||
const auto & data = col_to->getData();
|
||||
auto & chars = col_to_fixed_string->getChars();
|
||||
chars.resize(data.size() * sizeof(UInt128));
|
||||
memcpy(chars.data(), data.data(), data.size() * sizeof(UInt128));
|
||||
return col_to_fixed_string;
|
||||
}
|
||||
|
||||
@ -1593,19 +1453,17 @@ public:
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
|
||||
{
|
||||
public:
|
||||
explicit FunctionAnyHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector
|
||||
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1841,7 +1699,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
||||
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
|
||||
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
|
||||
#if USE_SSL
|
||||
@ -1855,10 +1713,8 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
|
||||
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
|
||||
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
|
||||
using FunctionSipHash128ReferenceKeyed
|
||||
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
|
||||
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
|
||||
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
|
||||
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
|
||||
|
@ -20,11 +20,6 @@ REGISTER_FUNCTION(Hashing)
|
||||
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
|
||||
.categories{"Hash"}
|
||||
});
|
||||
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
|
||||
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
|
||||
"instead of using a fixed key.",
|
||||
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
|
||||
.categories{"Hash"}});
|
||||
factory.registerFunction<FunctionCityHash64>();
|
||||
factory.registerFunction<FunctionFarmFingerprint64>();
|
||||
factory.registerFunction<FunctionFarmHash64>();
|
||||
|
158
src/Functions/HasSubsequenceImpl.h
Normal file
158
src/Functions/HasSubsequenceImpl.h
Normal file
@ -0,0 +1,158 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/GatherUtils/Sources.h>
|
||||
#include <Functions/GatherUtils/Sinks.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace GatherUtils;
|
||||
|
||||
template <typename Name, typename Impl>
|
||||
class HasSubsequenceImpl : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<HasSubsequenceImpl>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return false; }
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {};}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of function {}",
|
||||
arguments[0]->getName(), getName());
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument of function {}",
|
||||
arguments[1]->getName(), getName());
|
||||
|
||||
return std::make_shared<DataTypeNumber<UInt8>>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnPtr & column_haystack = arguments[0].column;
|
||||
const ColumnPtr & column_needle = arguments[1].column;
|
||||
|
||||
const ColumnConst * haystack_const_string = checkAndGetColumnConst<ColumnString>(column_haystack.get());
|
||||
const ColumnConst * needle_const_string = checkAndGetColumnConst<ColumnString>(column_needle.get());
|
||||
const ColumnString * haystack_string = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
const ColumnString * needle_string = checkAndGetColumn<ColumnString>(&*column_needle);
|
||||
|
||||
auto col_res = ColumnVector<UInt8>::create();
|
||||
typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
|
||||
vec_res.resize(input_rows_count);
|
||||
|
||||
if (haystack_string && needle_string)
|
||||
execute(StringSource{*haystack_string}, StringSource{*needle_string}, vec_res);
|
||||
else if (haystack_string && needle_const_string)
|
||||
execute(StringSource{*haystack_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
|
||||
else if (haystack_const_string && needle_string)
|
||||
execute(ConstSource<StringSource>{*haystack_const_string}, StringSource{*needle_string}, vec_res);
|
||||
else if (haystack_const_string && needle_const_string)
|
||||
execute(ConstSource<StringSource>{*haystack_const_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal columns {} and {} of arguments of function {}",
|
||||
arguments[0].column->getName(),
|
||||
arguments[1].column->getName(),
|
||||
getName());
|
||||
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template <typename SourceHaystack, typename SourceNeedle>
|
||||
void execute(
|
||||
SourceHaystack && haystacks,
|
||||
SourceNeedle && needles,
|
||||
PaddedPODArray<UInt8> & res_data) const
|
||||
{
|
||||
while (!haystacks.isEnd())
|
||||
{
|
||||
auto haystack_slice = haystacks.getWhole();
|
||||
auto needle_slice = needles.getWhole();
|
||||
size_t row_num = haystacks.rowNum();
|
||||
|
||||
if constexpr (!Impl::is_utf8)
|
||||
res_data[row_num] = hasSubsequence(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
|
||||
else
|
||||
res_data[row_num] = hasSubsequenceUTF8(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
|
||||
|
||||
haystacks.next();
|
||||
needles.next();
|
||||
}
|
||||
}
|
||||
|
||||
static UInt8 hasSubsequence(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
|
||||
{
|
||||
size_t j = 0;
|
||||
for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
|
||||
if (Impl::toLowerIfNeed(needle[j]) == Impl::toLowerIfNeed(haystack[i]))
|
||||
++j;
|
||||
return j == needle_size;
|
||||
}
|
||||
|
||||
static UInt8 hasSubsequenceUTF8(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
|
||||
{
|
||||
const auto * haystack_pos = haystack;
|
||||
const auto * needle_pos = needle;
|
||||
const auto * haystack_end = haystack + haystack_size;
|
||||
const auto * needle_end = needle + needle_size;
|
||||
|
||||
if (!needle_size)
|
||||
return 1;
|
||||
|
||||
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
||||
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
if (!haystack_code_point || !needle_code_point)
|
||||
return 0;
|
||||
|
||||
while (haystack_code_point && needle_code_point)
|
||||
{
|
||||
if (Impl::toLowerIfNeed(*needle_code_point) == Impl::toLowerIfNeed(*haystack_code_point))
|
||||
{
|
||||
needle_pos += UTF8::seqLength(*needle_pos);
|
||||
if (needle_pos >= needle_end)
|
||||
break;
|
||||
needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
||||
}
|
||||
haystack_pos += UTF8::seqLength(*haystack_pos);
|
||||
if (haystack_pos >= haystack_end)
|
||||
break;
|
||||
haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
||||
}
|
||||
return needle_pos == needle_end;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
30
src/Functions/hasSubsequence.cpp
Normal file
30
src/Functions/hasSubsequence.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/HasSubsequenceImpl.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct HasSubsequenceCaseSensitiveASCII
|
||||
{
|
||||
static constexpr bool is_utf8 = false;
|
||||
|
||||
static int toLowerIfNeed(int c) { return c; }
|
||||
};
|
||||
|
||||
struct NameHasSubsequence
|
||||
{
|
||||
static constexpr auto name = "hasSubsequence";
|
||||
};
|
||||
|
||||
using FunctionHasSubsequence = HasSubsequenceImpl<NameHasSubsequence, HasSubsequenceCaseSensitiveASCII>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(hasSubsequence)
|
||||
{
|
||||
factory.registerFunction<FunctionHasSubsequence>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
29
src/Functions/hasSubsequenceCaseInsensitive.cpp
Normal file
29
src/Functions/hasSubsequenceCaseInsensitive.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/HasSubsequenceImpl.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct HasSubsequenceCaseInsensitiveASCII
|
||||
{
|
||||
static constexpr bool is_utf8 = false;
|
||||
|
||||
static int toLowerIfNeed(int c) { return std::tolower(c); }
|
||||
};
|
||||
|
||||
struct NameHasSubsequenceCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "hasSubsequenceCaseInsensitive";
|
||||
};
|
||||
|
||||
using FunctionHasSubsequenceCaseInsensitive = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitive, HasSubsequenceCaseInsensitiveASCII>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(hasSubsequenceCaseInsensitive)
|
||||
{
|
||||
factory.registerFunction<FunctionHasSubsequenceCaseInsensitive>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
31
src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
Normal file
31
src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/HasSubsequenceImpl.h>
|
||||
|
||||
#include "Poco/Unicode.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct HasSubsequenceCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr bool is_utf8 = true;
|
||||
|
||||
static int toLowerIfNeed(int code_point) { return Poco::Unicode::toLower(code_point); }
|
||||
};
|
||||
|
||||
struct NameHasSubsequenceCaseInsensitiveUTF8
|
||||
{
|
||||
static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8";
|
||||
};
|
||||
|
||||
using FunctionHasSubsequenceCaseInsensitiveUTF8 = HasSubsequenceImpl<NameHasSubsequenceCaseInsensitiveUTF8, HasSubsequenceCaseInsensitiveUTF8>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8)
|
||||
{
|
||||
factory.registerFunction<FunctionHasSubsequenceCaseInsensitiveUTF8>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
30
src/Functions/hasSubsequenceUTF8.cpp
Normal file
30
src/Functions/hasSubsequenceUTF8.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/HasSubsequenceImpl.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct HasSubsequenceCaseSensitiveUTF8
|
||||
{
|
||||
static constexpr bool is_utf8 = true;
|
||||
|
||||
static int toLowerIfNeed(int code_point) { return code_point; }
|
||||
};
|
||||
|
||||
struct NameHasSubsequenceUTF8
|
||||
{
|
||||
static constexpr auto name = "hasSubsequenceUTF8";
|
||||
};
|
||||
|
||||
using FunctionHasSubsequenceUTF8 = HasSubsequenceImpl<NameHasSubsequenceUTF8, HasSubsequenceCaseSensitiveUTF8>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(hasSubsequenceUTF8)
|
||||
{
|
||||
factory.registerFunction<FunctionHasSubsequenceUTF8>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,3 @@
|
||||
#include "FunctionsStringSearch.h"
|
||||
#include "FunctionFactory.h"
|
||||
#include "like.h"
|
||||
|
||||
|
@ -95,7 +95,7 @@ size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_
|
||||
/// It reports real time spent including the time spent while thread was preempted doing nothing.
|
||||
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
|
||||
/// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
|
||||
/// (TaskStatsInfoGetter has about 500K RPS).
|
||||
/// (NetlinkMetricsProvider has about 500K RPS).
|
||||
watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
|
||||
|
||||
|
@ -78,7 +78,7 @@ std::future<IAsynchronousReader::Result> SynchronousReader::submit(Request reque
|
||||
/// It reports real time spent including the time spent while thread was preempted doing nothing.
|
||||
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
|
||||
/// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
|
||||
/// (TaskStatsInfoGetter has about 500K RPS).
|
||||
/// (NetlinkMetricsProvider has about 500K RPS).
|
||||
watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
|
||||
|
||||
|
@ -2515,11 +2515,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr
|
||||
/// find input node which refers to the output node
|
||||
/// consider only aliases on the path
|
||||
const auto * node = output_node;
|
||||
while (node && node->type == ActionsDAG::ActionType::ALIAS)
|
||||
while (node)
|
||||
{
|
||||
/// alias has only one child
|
||||
chassert(node->children.size() == 1);
|
||||
node = node->children.front();
|
||||
if (node->type == ActionsDAG::ActionType::ALIAS)
|
||||
{
|
||||
node = node->children.front();
|
||||
}
|
||||
/// materiailze() function can occur when dealing with views
|
||||
/// TODO: not sure if it should be done here, looks too generic place
|
||||
else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize")
|
||||
{
|
||||
chassert(node->children.size() == 1);
|
||||
node = node->children.front();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (node && node->type == ActionsDAG::ActionType::INPUT)
|
||||
index.emplace(output_node->result_name, node);
|
||||
|
@ -5,6 +5,11 @@
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric FilesystemCacheSizeLimit;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -18,7 +23,10 @@ private:
|
||||
using LRUQueueIterator = typename LRUQueue::iterator;
|
||||
|
||||
public:
|
||||
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
|
||||
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_)
|
||||
{
|
||||
CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_);
|
||||
}
|
||||
|
||||
size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
|
||||
|
||||
|
@ -784,15 +784,32 @@ Strings Context::getWarnings() const
|
||||
auto lock = getLock();
|
||||
common_warnings = shared->warnings;
|
||||
}
|
||||
/// Make setting's name ordered
|
||||
std::set<String> obsolete_settings;
|
||||
for (const auto & setting : settings)
|
||||
{
|
||||
if (setting.isValueChanged() && setting.isObsolete())
|
||||
{
|
||||
common_warnings.emplace_back("Some obsolete setting is changed. "
|
||||
"Check 'select * from system.settings where changed' and read the changelog.");
|
||||
break;
|
||||
}
|
||||
obsolete_settings.emplace(setting.getName());
|
||||
}
|
||||
|
||||
if (!obsolete_settings.empty())
|
||||
{
|
||||
bool single_element = obsolete_settings.size() == 1;
|
||||
String res = single_element ? "Obsolete setting [" : "Obsolete settings [";
|
||||
|
||||
bool first = true;
|
||||
for (const auto & setting : obsolete_settings)
|
||||
{
|
||||
res += first ? "" : ", ";
|
||||
res += "'" + setting + "'";
|
||||
first = false;
|
||||
}
|
||||
res = res + "]" + (single_element ? " is" : " are")
|
||||
+ " changed. "
|
||||
"Please check 'select * from system.settings where changed and is_obsolete' and read the changelog.";
|
||||
common_warnings.emplace_back(res);
|
||||
}
|
||||
|
||||
return common_warnings;
|
||||
}
|
||||
|
||||
@ -1461,15 +1478,24 @@ void Context::addQueryAccessInfo(
|
||||
void Context::addQueryAccessInfo(const Names & partition_names)
|
||||
{
|
||||
if (isGlobalContext())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(query_access_info.mutex);
|
||||
for (const auto & partition_name : partition_names)
|
||||
{
|
||||
query_access_info.partitions.emplace(partition_name);
|
||||
}
|
||||
}
|
||||
|
||||
void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
|
||||
{
|
||||
if (!qualified_projection_name)
|
||||
return;
|
||||
|
||||
if (isGlobalContext())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
|
||||
|
||||
std::lock_guard<std::mutex> lock(query_access_info.mutex);
|
||||
query_access_info.projections.emplace(fmt::format(
|
||||
"{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name)));
|
||||
}
|
||||
|
||||
void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
|
||||
@ -2205,9 +2231,9 @@ BackupsWorker & Context::getBackupsWorker() const
|
||||
const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true);
|
||||
|
||||
const auto & config = getConfigRef();
|
||||
const auto & settings_ = getSettingsRef();
|
||||
UInt64 backup_threads = config.getUInt64("backup_threads", settings_.backup_threads);
|
||||
UInt64 restore_threads = config.getUInt64("restore_threads", settings_.restore_threads);
|
||||
const auto & settings_ref = getSettingsRef();
|
||||
UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
|
||||
UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
|
||||
|
||||
if (!shared->backups_worker)
|
||||
shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
|
||||
@ -4478,10 +4504,10 @@ ReadSettings Context::getReadSettings() const
|
||||
|
||||
ReadSettings Context::getBackupReadSettings() const
|
||||
{
|
||||
ReadSettings settings_ = getReadSettings();
|
||||
settings_.remote_throttler = getBackupsThrottler();
|
||||
settings_.local_throttler = getBackupsThrottler();
|
||||
return settings_;
|
||||
ReadSettings read_settings = getReadSettings();
|
||||
read_settings.remote_throttler = getBackupsThrottler();
|
||||
read_settings.local_throttler = getBackupsThrottler();
|
||||
return read_settings;
|
||||
}
|
||||
|
||||
WriteSettings Context::getWriteSettings() const
|
||||
@ -4510,14 +4536,13 @@ std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
|
||||
|
||||
Context::ParallelReplicasMode Context::getParallelReplicasMode() const
|
||||
{
|
||||
const auto & settings_ = getSettingsRef();
|
||||
const auto & settings_ref = getSettingsRef();
|
||||
|
||||
using enum Context::ParallelReplicasMode;
|
||||
if (!settings_.parallel_replicas_custom_key.value.empty())
|
||||
if (!settings_ref.parallel_replicas_custom_key.value.empty())
|
||||
return CUSTOM_KEY;
|
||||
|
||||
if (settings_.allow_experimental_parallel_reading_from_replicas > 0
|
||||
&& !settings_.use_hedged_requests)
|
||||
if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests)
|
||||
return READ_TASKS;
|
||||
|
||||
return SAMPLE_KEY;
|
||||
@ -4525,17 +4550,15 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
|
||||
|
||||
bool Context::canUseParallelReplicasOnInitiator() const
|
||||
{
|
||||
const auto & settings_ = getSettingsRef();
|
||||
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
|
||||
&& settings_.max_parallel_replicas > 1
|
||||
const auto & settings_ref = getSettingsRef();
|
||||
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
|
||||
&& !getClientInfo().collaborate_with_initiator;
|
||||
}
|
||||
|
||||
bool Context::canUseParallelReplicasOnFollower() const
|
||||
{
|
||||
const auto & settings_ = getSettingsRef();
|
||||
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
|
||||
&& settings_.max_parallel_replicas > 1
|
||||
const auto & settings_ref = getSettingsRef();
|
||||
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
|
||||
&& getClientInfo().collaborate_with_initiator;
|
||||
}
|
||||
|
||||
|
@ -658,6 +658,14 @@ public:
|
||||
const String & view_name = {});
|
||||
void addQueryAccessInfo(const Names & partition_names);
|
||||
|
||||
struct QualifiedProjectionName
|
||||
{
|
||||
StorageID storage_id = StorageID::createEmpty();
|
||||
String projection_name;
|
||||
explicit operator bool() const { return !projection_name.empty(); }
|
||||
};
|
||||
void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
|
||||
|
||||
|
||||
/// Supported factories for records in query_log
|
||||
enum class QueryLogFactories
|
||||
|
@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get)
|
||||
|
||||
auto & file_segment2 = get(holder2, 2);
|
||||
ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
|
||||
ASSERT_EQ(file_segment2.state(), State::DOWNLOADING);
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get)
|
||||
cv.notify_one();
|
||||
|
||||
file_segment2.wait(file_segment2.range().right);
|
||||
file_segment2.complete();
|
||||
ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
|
||||
ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size());
|
||||
});
|
||||
|
||||
{
|
||||
@ -488,7 +488,7 @@ TEST_F(FileCacheTest, get)
|
||||
}
|
||||
|
||||
download(file_segment);
|
||||
ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
|
||||
ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
|
||||
|
||||
other_1.join();
|
||||
|
||||
|
@ -92,18 +92,6 @@ static AggregateProjectionInfo getAggregatingProjectionInfo(
|
||||
return info;
|
||||
}
|
||||
|
||||
static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & names)
|
||||
{
|
||||
for (const auto & query_name : names)
|
||||
{
|
||||
auto jt = index.find(query_name);
|
||||
if (jt == index.end() || jt->second->result_type->isNullable())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct AggregateFunctionMatch
|
||||
{
|
||||
const AggregateDescription * description = nullptr;
|
||||
@ -170,20 +158,14 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
|
||||
}
|
||||
|
||||
/// This is a special case for the function count().
|
||||
/// We can assume that 'count(expr) == count()' if expr is not nullable.
|
||||
if (typeid_cast<const AggregateFunctionCount *>(candidate.function.get()))
|
||||
/// We can assume that 'count(expr) == count()' if expr is not nullable,
|
||||
/// which can be verified by simply casting to `AggregateFunctionCount *`.
|
||||
if (typeid_cast<const AggregateFunctionCount *>(aggregate.function.get()))
|
||||
{
|
||||
bool has_nullable_or_missing_arg = false;
|
||||
has_nullable_or_missing_arg |= hasNullableOrMissingColumn(query_index, aggregate.argument_names);
|
||||
has_nullable_or_missing_arg |= hasNullableOrMissingColumn(proj_index, candidate.argument_names);
|
||||
|
||||
if (!has_nullable_or_missing_arg)
|
||||
{
|
||||
/// we can ignore arguments for count()
|
||||
found_match = true;
|
||||
res.push_back({&candidate, DataTypes()});
|
||||
break;
|
||||
}
|
||||
/// we can ignore arguments for count()
|
||||
found_match = true;
|
||||
res.push_back({&candidate, DataTypes()});
|
||||
break;
|
||||
}
|
||||
|
||||
/// Now, function names and types matched.
|
||||
@ -628,8 +610,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
|
||||
// candidates.minmax_projection->block.dumpStructure());
|
||||
|
||||
Pipe pipe(std::make_shared<SourceFromSingleChunk>(std::move(candidates.minmax_projection->block)));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(
|
||||
std::move(pipe),
|
||||
context,
|
||||
query_info.is_internal
|
||||
? Context::QualifiedProjectionName{}
|
||||
: Context::QualifiedProjectionName
|
||||
{
|
||||
.storage_id = reading->getMergeTreeData().getStorageID(),
|
||||
.projection_name = candidates.minmax_projection->candidate.projection->name,
|
||||
});
|
||||
has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty();
|
||||
if (has_ordinary_parts)
|
||||
reading->resetParts(std::move(candidates.minmax_projection->normal_parts));
|
||||
@ -661,7 +651,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
|
||||
{
|
||||
auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames());
|
||||
Pipe pipe(std::make_shared<NullSource>(std::move(header)));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(
|
||||
std::move(pipe),
|
||||
context,
|
||||
query_info.is_internal
|
||||
? Context::QualifiedProjectionName{}
|
||||
: Context::QualifiedProjectionName
|
||||
{
|
||||
.storage_id = reading->getMergeTreeData().getStorageID(),
|
||||
.projection_name = best_candidate->projection->name,
|
||||
});
|
||||
}
|
||||
|
||||
has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
|
||||
|
@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
break;
|
||||
}
|
||||
|
||||
/// Dangling query plan node. This might be generated by StorageMerge.
|
||||
if (iter->node->step.get() == reading)
|
||||
return false;
|
||||
|
||||
const auto metadata = reading->getStorageMetadata();
|
||||
const auto & projections = metadata->projections;
|
||||
|
||||
@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
|
||||
QueryDAG query;
|
||||
{
|
||||
auto & clild = iter->node->children[iter->next_child - 1];
|
||||
if (!query.build(*clild))
|
||||
auto & child = iter->node->children[iter->next_child - 1];
|
||||
if (!query.build(*child))
|
||||
return false;
|
||||
|
||||
if (query.dag)
|
||||
@ -183,7 +187,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
if (!projection_reading)
|
||||
{
|
||||
Pipe pipe(std::make_shared<NullSource>(proj_snapshot->getSampleBlockForColumns(required_columns)));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(
|
||||
std::move(pipe),
|
||||
context,
|
||||
query_info.is_internal
|
||||
? Context::QualifiedProjectionName{}
|
||||
: Context::QualifiedProjectionName
|
||||
{
|
||||
.storage_id = reading->getMergeTreeData().getStorageID(),
|
||||
.projection_name = best_candidate->projection->name,
|
||||
});
|
||||
}
|
||||
|
||||
bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr;
|
||||
|
@ -1761,6 +1761,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
|
||||
fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id));
|
||||
}
|
||||
context->getQueryContext()->addQueryAccessInfo(partition_names);
|
||||
|
||||
if (storage_snapshot->projection)
|
||||
context->getQueryContext()->addQueryAccessInfo(
|
||||
Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name});
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
|
||||
|
@ -4,14 +4,19 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_)
|
||||
ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_)
|
||||
: ISourceStep(DataStream{.header = pipe_.getHeader()})
|
||||
, pipe(std::move(pipe_))
|
||||
, context(std::move(context_))
|
||||
, qualified_projection_name(std::move(qualified_projection_name_))
|
||||
{
|
||||
}
|
||||
|
||||
void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
|
||||
{
|
||||
if (context && context->hasQueryContext())
|
||||
context->getQueryContext()->addQueryAccessInfo(qualified_projection_name);
|
||||
|
||||
for (const auto & processor : pipe.getProcessors())
|
||||
processors.emplace_back(processor);
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/QueryPlan/ISourceStep.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
|
||||
@ -9,7 +11,8 @@ namespace DB
|
||||
class ReadFromPreparedSource : public ISourceStep
|
||||
{
|
||||
public:
|
||||
explicit ReadFromPreparedSource(Pipe pipe_);
|
||||
explicit ReadFromPreparedSource(
|
||||
Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {});
|
||||
|
||||
String getName() const override { return "ReadFromPreparedSource"; }
|
||||
|
||||
@ -18,6 +21,7 @@ public:
|
||||
protected:
|
||||
Pipe pipe;
|
||||
ContextPtr context;
|
||||
Context::QualifiedProjectionName qualified_projection_name;
|
||||
};
|
||||
|
||||
class ReadFromStorageStep : public ReadFromPreparedSource
|
||||
|
@ -33,7 +33,7 @@ struct Settings;
|
||||
/** Data storing format settings. */ \
|
||||
M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
|
||||
M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \
|
||||
M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
|
||||
M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
|
||||
\
|
||||
/** Merge settings. */ \
|
||||
M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
|
||||
|
@ -21,6 +21,7 @@ NamesAndTypesList SystemMergeTreeSettings<replicated>::getNamesAndTypes()
|
||||
{"max", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
|
||||
{"readonly", std::make_shared<DataTypeUInt8>()},
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
{"is_obsolete", std::make_shared<DataTypeUInt8>()},
|
||||
};
|
||||
}
|
||||
|
||||
@ -52,6 +53,7 @@ void SystemMergeTreeSettings<replicated>::fillData(MutableColumns & res_columns,
|
||||
res_columns[5]->insert(max);
|
||||
res_columns[6]->insert(writability == SettingConstraintWritability::CONST);
|
||||
res_columns[7]->insert(setting.getTypeName());
|
||||
res_columns[8]->insert(setting.isObsolete());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
|
||||
{"changed", std::make_shared<DataTypeUInt8>()},
|
||||
{"description", std::make_shared<DataTypeString>()},
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
{"is_obsolete", std::make_shared<DataTypeUInt8>()},
|
||||
};
|
||||
}
|
||||
|
||||
@ -33,6 +34,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context
|
||||
res_columns[3]->insert(setting.isValueChanged());
|
||||
res_columns[4]->insert(setting.getDescription());
|
||||
res_columns[5]->insert(setting.getTypeName());
|
||||
res_columns[6]->insert(setting.isObsolete());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
{"default", std::make_shared<DataTypeString>()},
|
||||
{"alias_for", std::make_shared<DataTypeString>()},
|
||||
{"is_obsolete", std::make_shared<DataTypeUInt8>()},
|
||||
};
|
||||
}
|
||||
|
||||
@ -51,6 +52,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co
|
||||
res_columns[6]->insert(writability == SettingConstraintWritability::CONST);
|
||||
res_columns[7]->insert(setting.getTypeName());
|
||||
res_columns[8]->insert(setting.getDefaultValueString());
|
||||
res_columns[10]->insert(setting.isObsolete());
|
||||
};
|
||||
|
||||
const auto & settings_to_aliases = Settings::Traits::settingsToAliases();
|
||||
|
@ -8,6 +8,7 @@ import shutil
|
||||
import subprocess
|
||||
import time
|
||||
import sys
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
@ -31,6 +32,17 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
|
||||
ImagesDict = Dict[str, dict]
|
||||
|
||||
|
||||
# workaround for mypy issue [1]:
|
||||
#
|
||||
# "Argument 1 to "map" has incompatible type overloaded function" [1]
|
||||
#
|
||||
# [1]: https://github.com/python/mypy/issues/9864
|
||||
#
|
||||
# NOTE: simply lambda will do the trick as well, but pylint will not like it
|
||||
def realpath(*args, **kwargs):
|
||||
return os.path.realpath(*args, **kwargs)
|
||||
|
||||
|
||||
class DockerImage:
|
||||
def __init__(
|
||||
self,
|
||||
@ -111,8 +123,23 @@ def get_changed_docker_images(
|
||||
changed_images = []
|
||||
|
||||
for dockerfile_dir, image_description in images_dict.items():
|
||||
source_dir = GITHUB_WORKSPACE.rstrip("/") + "/"
|
||||
dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True)
|
||||
# resolve symlinks
|
||||
dockerfile_files = list(map(realpath, dockerfile_files))
|
||||
# trim prefix to get relative path again, to match with files_changed
|
||||
dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files))
|
||||
logging.info(
|
||||
"Docker %s (source_dir=%s) build context for PR %s @ %s: %s",
|
||||
dockerfile_dir,
|
||||
source_dir,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
str(dockerfile_files),
|
||||
)
|
||||
|
||||
for f in files_changed:
|
||||
if f.startswith(dockerfile_dir):
|
||||
if f in dockerfile_files:
|
||||
name = image_description["name"]
|
||||
only_amd64 = image_description.get("only_amd64", False)
|
||||
logging.info(
|
||||
@ -245,6 +272,8 @@ def build_and_push_one_image(
|
||||
cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}"
|
||||
|
||||
cmd = (
|
||||
# tar is requried to follow symlinks, since docker-build cannot do this
|
||||
f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | "
|
||||
"docker buildx build --builder default "
|
||||
f"--label build-url={GITHUB_RUN_URL} "
|
||||
f"{from_tag_arg}"
|
||||
@ -254,7 +283,7 @@ def build_and_push_one_image(
|
||||
f"{cache_from} "
|
||||
f"--cache-to type=inline,mode=max "
|
||||
f"{push_arg}"
|
||||
f"--progress plain {image.full_path}"
|
||||
f"--progress plain -"
|
||||
)
|
||||
logging.info("Docker command to run: %s", cmd)
|
||||
with TeePopen(cmd, build_log) as proc:
|
||||
|
@ -126,12 +126,13 @@ class TestDockerImageCheck(unittest.TestCase):
|
||||
mock_popen.assert_called_once()
|
||||
mock_machine.assert_not_called()
|
||||
self.assertIn(
|
||||
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
|
||||
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
|
||||
"--build-arg FROM_TAG=version "
|
||||
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
|
||||
"--tag name:version --cache-from type=registry,ref=name:version "
|
||||
"--cache-from type=registry,ref=name:latest "
|
||||
"--cache-to type=inline,mode=max --push --progress plain path",
|
||||
"--cache-to type=inline,mode=max --push --progress plain -",
|
||||
mock_popen.call_args.args,
|
||||
)
|
||||
self.assertTrue(result)
|
||||
@ -143,12 +144,13 @@ class TestDockerImageCheck(unittest.TestCase):
|
||||
mock_popen.assert_called_once()
|
||||
mock_machine.assert_not_called()
|
||||
self.assertIn(
|
||||
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
|
||||
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
|
||||
"--build-arg FROM_TAG=version2 "
|
||||
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
|
||||
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
|
||||
"--cache-from type=registry,ref=name:latest "
|
||||
"--cache-to type=inline,mode=max --progress plain path",
|
||||
"--cache-to type=inline,mode=max --progress plain -",
|
||||
mock_popen.call_args.args,
|
||||
)
|
||||
self.assertTrue(result)
|
||||
@ -160,11 +162,12 @@ class TestDockerImageCheck(unittest.TestCase):
|
||||
mock_popen.assert_called_once()
|
||||
mock_machine.assert_not_called()
|
||||
self.assertIn(
|
||||
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
|
||||
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
|
||||
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
|
||||
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
|
||||
"--cache-from type=registry,ref=name:latest "
|
||||
"--cache-to type=inline,mode=max --progress plain path",
|
||||
"--cache-to type=inline,mode=max --progress plain -",
|
||||
mock_popen.call_args.args,
|
||||
)
|
||||
self.assertFalse(result)
|
||||
@ -178,13 +181,14 @@ class TestDockerImageCheck(unittest.TestCase):
|
||||
mock_popen.assert_called_once()
|
||||
mock_machine.assert_not_called()
|
||||
self.assertIn(
|
||||
"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | "
|
||||
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
|
||||
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
|
||||
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
|
||||
"--cache-from type=registry,ref=name:latest "
|
||||
"--cache-from type=registry,ref=name:cached-version "
|
||||
"--cache-from type=registry,ref=name:another-cached "
|
||||
"--cache-to type=inline,mode=max --progress plain path",
|
||||
"--cache-to type=inline,mode=max --progress plain -",
|
||||
mock_popen.call_args.args,
|
||||
)
|
||||
self.assertFalse(result)
|
||||
|
@ -12,6 +12,22 @@ from helpers.network import _NetworkManager
|
||||
logging.raiseExceptions = False
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="session")
|
||||
def tune_local_port_range():
|
||||
# Lots of services uses non privileged ports:
|
||||
# - hdfs -- 50020/50070/...
|
||||
# - minio
|
||||
# - mysql
|
||||
# - psql
|
||||
#
|
||||
# So instead of tuning all these thirdparty services, let's simply
|
||||
# prohibit using such ports for outgoing connections, this should fix
|
||||
# possible "Address already in use" errors.
|
||||
#
|
||||
# NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error.
|
||||
run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="session")
|
||||
def cleanup_environment():
|
||||
try:
|
||||
|
@ -45,5 +45,6 @@
|
||||
|
||||
<merge_tree>
|
||||
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
|
@ -980,6 +980,89 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name
|
||||
mysql_node.query("DROP DATABASE test_database_event")
|
||||
|
||||
|
||||
def text_blob_with_charset_test(clickhouse_node, mysql_node, service_name):
|
||||
db = "text_blob_with_charset_test"
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
mysql_node.query(f"CREATE DATABASE {db} DEFAULT CHARACTER SET 'utf8'")
|
||||
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.test_table_1 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET big5, d longtext, e varchar(256), f char(4)) ENGINE = InnoDB DEFAULT CHARSET=gbk"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.test_table_2 (a INT NOT NULL PRIMARY KEY, b blob, c longblob) ENGINE = InnoDB DEFAULT CHARSET=gbk"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.test_table_3 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET gbk, d tinytext CHARSET big5, e varchar(256), f char(4)) ENGINE = InnoDB"
|
||||
)
|
||||
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_1 VALUES (1, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_2 VALUES (1, '你好', 0xFAAA00000000000DDCC)"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_3 VALUES (1, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
|
||||
)
|
||||
|
||||
clickhouse_node.query(
|
||||
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
|
||||
)
|
||||
assert db in clickhouse_node.query("SHOW DATABASES")
|
||||
|
||||
# from full replication
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SHOW TABLES FROM {db} FORMAT TSV",
|
||||
"test_table_1\ntest_table_2\ntest_table_3\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 1 FORMAT TSV",
|
||||
"你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 1 FORMAT TSV",
|
||||
"E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 1 FORMAT TSV",
|
||||
"你好\t世界\thello\t您Hi您\t您Hi您\n",
|
||||
)
|
||||
|
||||
# from increment replication
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_1 VALUES (2, '你好', '世界', '哈罗', '您Hi您', '您Hi您')"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_2 VALUES (2, '你好', 0xFAAA00000000000DDCC)"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"INSERT INTO {db}.test_table_3 VALUES (2, '你好', '世界', 'hello', '您Hi您', '您Hi您')"
|
||||
)
|
||||
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 2 FORMAT TSV",
|
||||
"你好\t世界\t哈罗\t您Hi您\t您Hi您\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 2 FORMAT TSV",
|
||||
"E4BDA0E5A5BD\t0FAAA00000000000DDCC\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 2 FORMAT TSV",
|
||||
"你好\t世界\thello\t您Hi您\t您Hi您\n",
|
||||
)
|
||||
clickhouse_node.query(f"DROP DATABASE {db}")
|
||||
mysql_node.query(f"DROP DATABASE {db}")
|
||||
|
||||
|
||||
def select_without_columns(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS db")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS db")
|
||||
|
@ -262,6 +262,12 @@ def test_materialized_database_ddl_with_empty_transaction_8_0(
|
||||
)
|
||||
|
||||
|
||||
def test_text_blob_charset(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
materialized_with_ddl.text_blob_with_charset_test(
|
||||
clickhouse_node, started_mysql_8_0, "mysql80"
|
||||
)
|
||||
|
||||
|
||||
def test_select_without_columns_5_7(
|
||||
started_cluster, started_mysql_5_7, clickhouse_node
|
||||
):
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
|
||||
</clickhouse>
|
||||
|
@ -152,6 +152,7 @@
|
||||
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
|
||||
<database_catalog_unused_dir_hide_timeout_sec>0</database_catalog_unused_dir_hide_timeout_sec>
|
||||
|
@ -0,0 +1,5 @@
|
||||
<clickhouse>
|
||||
<merge_tree>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
@ -1,5 +0,0 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default/>
|
||||
</profiles>
|
||||
</clickhouse>
|
@ -1,18 +0,0 @@
|
||||
<clickhouse>
|
||||
<tcp_port>9000</tcp_port>
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
|
||||
<openSSL>
|
||||
<client>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<verificationMode>none</verificationMode>
|
||||
<invalidCertificateHandler>
|
||||
<name>AcceptCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
|
||||
<max_concurrent_queries>500</max_concurrent_queries>
|
||||
<path>./clickhouse/</path>
|
||||
<users_config>users.xml</users_config>
|
||||
</clickhouse>
|
@ -67,6 +67,7 @@ def cluster():
|
||||
"configs/config.d/storage_conf.xml",
|
||||
"configs/config.d/instant_moves.xml",
|
||||
"configs/config.d/part_log.xml",
|
||||
"configs/config.d/merge_tree.xml",
|
||||
],
|
||||
with_minio=True,
|
||||
)
|
||||
|
@ -1718,7 +1718,7 @@ def test_freeze(start_cluster):
|
||||
) ENGINE = MergeTree
|
||||
ORDER BY tuple()
|
||||
PARTITION BY toYYYYMM(d)
|
||||
SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false
|
||||
SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1
|
||||
"""
|
||||
)
|
||||
|
||||
|
@ -38,7 +38,7 @@ def partition_table_simple(started_cluster):
|
||||
q(
|
||||
"CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) "
|
||||
"ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) "
|
||||
"SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
|
||||
"SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )")
|
||||
q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )")
|
||||
@ -150,7 +150,7 @@ def partition_table_complex(started_cluster):
|
||||
q("DROP TABLE IF EXISTS test.partition_complex")
|
||||
q(
|
||||
"CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) "
|
||||
"ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false"
|
||||
"ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)")
|
||||
q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)")
|
||||
@ -188,7 +188,7 @@ def test_partition_complex(partition_table_complex):
|
||||
def cannot_attach_active_part_table(started_cluster):
|
||||
q("DROP TABLE IF EXISTS test.attach_active")
|
||||
q(
|
||||
"CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16")
|
||||
|
||||
@ -217,7 +217,7 @@ def attach_check_all_parts_table(started_cluster):
|
||||
q("DROP TABLE IF EXISTS test.attach_partition")
|
||||
q(
|
||||
"CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n "
|
||||
"SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
|
||||
"SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
|
||||
)
|
||||
q(
|
||||
"INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
|
||||
@ -299,7 +299,7 @@ def drop_detached_parts_table(started_cluster):
|
||||
q("SYSTEM STOP MERGES")
|
||||
q("DROP TABLE IF EXISTS test.drop_detached")
|
||||
q(
|
||||
"CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q(
|
||||
"INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8"
|
||||
@ -370,13 +370,13 @@ def test_drop_detached_parts(drop_detached_parts_table):
|
||||
|
||||
def test_system_detached_parts(drop_detached_parts_table):
|
||||
q(
|
||||
"create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q(
|
||||
"create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q(
|
||||
"create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
q(
|
||||
"create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x"
|
||||
@ -497,7 +497,7 @@ def test_system_detached_parts(drop_detached_parts_table):
|
||||
def test_detached_part_dir_exists(started_cluster):
|
||||
q(
|
||||
"create table detached_part_dir_exists (n int) engine=MergeTree order by n "
|
||||
"SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0"
|
||||
"SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0"
|
||||
)
|
||||
q("insert into detached_part_dir_exists select 1") # will create all_1_1_0
|
||||
q(
|
||||
@ -549,7 +549,7 @@ def test_detached_part_dir_exists(started_cluster):
|
||||
|
||||
def test_make_clone_in_detached(started_cluster):
|
||||
q(
|
||||
"create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false"
|
||||
"create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"
|
||||
)
|
||||
|
||||
path = path_to_data + "data/default/clone_in_detached/"
|
||||
|
@ -498,7 +498,7 @@ def test_polymorphic_parts_index(start_cluster):
|
||||
"""
|
||||
CREATE TABLE test_index.index_compact(a UInt32, s String)
|
||||
ENGINE = MergeTree ORDER BY a
|
||||
SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false"""
|
||||
SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1"""
|
||||
)
|
||||
|
||||
node1.query(
|
||||
|
@ -35,6 +35,7 @@
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
<allow_remote_fs_zero_copy_replication>0</allow_remote_fs_zero_copy_replication>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
|
||||
<remote_servers>
|
||||
|
@ -29,6 +29,7 @@
|
||||
<merge_tree>
|
||||
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
|
||||
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
|
||||
<remote_servers>
|
||||
|
@ -70,6 +70,7 @@
|
||||
<min_bytes_for_wide_part>1024</min_bytes_for_wide_part>
|
||||
<old_parts_lifetime>1</old_parts_lifetime>
|
||||
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
|
||||
<remote_servers>
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
<merge_tree>
|
||||
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
|
||||
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
|
||||
</merge_tree>
|
||||
|
||||
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user