Merge branch 'master' into lower-contention-on-stacktrace-cache

This commit is contained in:
serxa 2024-04-10 12:08:21 +00:00
commit 5d576bb268
121 changed files with 2040 additions and 454 deletions

View File

@ -119,7 +119,6 @@ Checks: [
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-simplify-boolean-expr',
'-readability-static-accessed-through-instance',
'-readability-suspicious-call-argument',
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',

View File

@ -4835,7 +4835,7 @@ for (;; ptr++)
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
but those above are are explicitly listed afterwards. A flag byte tells
but those above are explicitly listed afterwards. A flag byte tells
whether the bitmap is present, and whether this is a negated class or not.
In JavaScript compatibility mode, an isolated ']' causes an error. In

View File

@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
# create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
)
add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

View File

@ -1,26 +1,18 @@
option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES})
option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES})
if (NOT ENABLE_SSH)
message(STATUS "Not using SSH")
message(STATUS "Not using libssh")
return()
endif()
# CMake variables needed by libssh_version.h.cmake, update them when you update libssh
set(libssh_VERSION_MAJOR 0)
set(libssh_VERSION_MINOR 9)
set(libssh_VERSION_PATCH 8)
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
# Set CMake variables which are used in libssh_version.h.cmake
project(libssh VERSION 0.9.8 LANGUAGES C)
set(LIBRARY_VERSION "4.8.8")
set(LIBRARY_SOVERSION "4")
set(CMAKE_THREAD_PREFER_PTHREADS ON)
set(THREADS_PREFER_PTHREAD_FLAG ON)
set(WITH_ZLIB OFF)
set(WITH_SYMBOL_VERSIONING OFF)
set(WITH_SERVER ON)
set(libssh_SRCS
${LIB_SOURCE_DIR}/src/agent.c
${LIB_SOURCE_DIR}/src/auth.c
@ -28,15 +20,21 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/bignum.c
${LIB_SOURCE_DIR}/src/buffer.c
${LIB_SOURCE_DIR}/src/callbacks.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/channels.c
${LIB_SOURCE_DIR}/src/client.c
${LIB_SOURCE_DIR}/src/config.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/connect.c
${LIB_SOURCE_DIR}/src/connector.c
${LIB_SOURCE_DIR}/src/curve25519.c
${LIB_SOURCE_DIR}/src/dh.c
${LIB_SOURCE_DIR}/src/ecdh.c
${LIB_SOURCE_DIR}/src/error.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/getpass.c
${LIB_SOURCE_DIR}/src/init.c
${LIB_SOURCE_DIR}/src/kdf.c
@ -55,37 +53,32 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/pcap.c
${LIB_SOURCE_DIR}/src/pki.c
${LIB_SOURCE_DIR}/src/pki_container_openssh.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/poll.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/scp.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/socket.c
${LIB_SOURCE_DIR}/src/string.c
${LIB_SOURCE_DIR}/src/threads.c
${LIB_SOURCE_DIR}/src/wrapper.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/token.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/wrapper.c
# some files of libssh/src/ are missing - why?
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/pthread.c
# files missing - why?
# LIBCRYPT specific
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/ecdh_crypto.c
${LIB_SOURCE_DIR}/src/libcrypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
${LIB_SOURCE_DIR}/src/bind.c
${LIB_SOURCE_DIR}/src/bind_config.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
)
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
@ -94,7 +87,7 @@ endif()
configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY)
add_library(_ssh STATIC ${libssh_SRCS})
add_library(_ssh ${libssh_SRCS})
add_library(ch_contrib::ssh ALIAS _ssh)
target_link_libraries(_ssh PRIVATE OpenSSL::Crypto)

View File

@ -45,6 +45,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Modify settings**
```sql
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
## Examples {#examples}
@ -97,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data
│ 65536 │ 10000 │
└─────────────┴────────────┘
```

View File

@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020"
The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Source: https://zenodo.org/records/5092942
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021
Run the command:
```bash
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget
```
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
@ -127,15 +127,15 @@ Average flight distance is around 1000 km.
Query:
```sql
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky;
```
Result:
```text
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
│ 1041090.6465708319
└────────────────────────────────────────────────────────────────────┘
┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐
1. 1041090.67 │ -- 1.04 million
──────────────────────────────────────────────────────────────────────────────┘
```
### Most busy origin airports and the average distance seen {#busy-airports-average-distance}

View File

@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
may return cached results then.
use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa
To force caching of results of queries with non-deterministic functions regardless, use setting
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force
caching of results of queries with system tables regardless, use setting
[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling).
:::note
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.

View File

@ -287,7 +287,7 @@ Default value: 0 (seconds)
## remote_fs_execute_merges_on_single_replica_time_threshold
When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.

View File

@ -1689,6 +1689,18 @@ Possible values:
Default value: `throw`.
## query_cache_system_table_handling {#query-cache-system-table-handling}
Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
Possible values:
- `'throw'` - Throw an exception and don't cache the query result.
- `'save'` - Cache the query result.
- `'ignore'` - Don't cache the query result and don't throw an exception.
Default value: `throw`.
## query_cache_min_query_runs {#query-cache-min-query-runs}
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
@ -5302,7 +5314,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`.

View File

@ -15,9 +15,9 @@ The `uniqCombined` function is a good choice for calculating the number of diffe
**Arguments**
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
**Returned value**
@ -25,26 +25,43 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
**Implementation details**
Function:
The `uniqCombined` function:
- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64).
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`:
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network.
**Example**
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e6);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 1001148 │ -- 1.00 million
└──────────────────────┘
```
See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs.
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)

View File

@ -5,4 +5,78 @@ sidebar_position: 193
# uniqCombined64
Same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types.
Calculates the approximate number of different argument values. It is the same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses a 64-bit hash for all data types rather than just for the String data type.
``` sql
uniqCombined64(HLL_precision)(x[, ...])
```
**Parameters**
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optionally, you can use the function as `uniqCombined64(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
**Returned value**
- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
**Implementation details**
The `uniqCombined64` function:
- Calculates a hash (64-bit hash for all data types) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 64-bit hash for all types, the result does not suffer from very high error for cardinalities significantly larger than `UINT_MAX` like [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) does, which uses a 32-bit hash for non-`String` types.
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined64` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
**Example**
In the example below `uniqCombined64` is run on `1e10` different numbers returning a very close approximation of the number of different argument values.
Query:
```sql
SELECT uniqCombined64(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined64(number)─┐
│ 9998568925 │ -- 10.00 billion
└────────────────────────┘
```
By comparison the `uniqCombined` function returns a rather poor approximation for an input this size.
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 5545308725 │ -- 5.55 billion
└──────────────────────┘
```
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)

View File

@ -81,6 +81,43 @@ Result:
│ 2.23606797749979 │
└──────────────────┘
```
## L2SquaredNorm
Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared.
**Syntax**
```sql
L2SquaredNorm(vector)
```
Alias: `normL2Squared`.
***Arguments**
- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
**Returned value**
- L2-norm squared.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2SquaredNorm((1, 2));
```
Result:
```text
┌─L2SquaredNorm((1, 2))─┐
│ 5 │
└───────────────────────┘
```
## LinfNorm

View File

@ -594,6 +594,45 @@ Calculates JumpConsistentHash form a UInt64.
Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32.
For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf)
## kostikConsistentHash
An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`.
**Syntax**
```sql
kostikConsistentHash(input, n)
```
Alias: `yandexConsistentHash` (left for backwards compatibility sake).
**Parameters**
- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md).
**Returned value**
- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
**Implementation details**
It is efficient only if n <= 32768.
**Example**
Query:
```sql
SELECT kostikConsistentHash(16045690984833335023, 2);
```
```response
┌─kostikConsistentHash(16045690984833335023, 2)─┐
│ 1 │
└───────────────────────────────────────────────┘
```
## murmurHash2_32, murmurHash2_64
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
@ -1153,6 +1192,42 @@ Result:
└────────────┘
```
## wyHash64
Produces a 64-bit [wyHash64](https://github.com/wangyi-fudan/wyhash) hash value.
**Syntax**
```sql
wyHash64(string)
```
**Arguments**
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- Hash value.
Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
SELECT wyHash64('ClickHouse') AS Hash;
```
Result:
```response
┌─────────────────Hash─┐
│ 12336419557878201794 │
└──────────────────────┘
```
## ngramMinHash
Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.

View File

@ -260,8 +260,36 @@ Alias: `lcase`
Converts the ASCII Latin symbols in a string to uppercase.
**Syntax**
``` sql
upper(input)
```
Alias: `ucase`
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Examples**
Query:
``` sql
SELECT upper('value') as Upper;
```
``` response
┌─Upper─┐
│ VALUE │
└───────┘
```
## lowerUTF8
Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
@ -278,6 +306,34 @@ Does not detect the language, e.g. for Turkish the result might not be exactly c
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
**Syntax**
``` sql
upperUTF8(input)
```
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Example**
Query:
``` sql
SELECT upperUTF8('München') as Upperutf8;
```
``` response
┌─Upperutf8─┐
│ MÜNCHEN │
└───────────┘
```
## isValidUTF8
Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0.

View File

@ -193,3 +193,33 @@ Result:
## translateUTF8
Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings.
**Syntax**
``` sql
translateUTF8(s, from, to)
```
**Parameters**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res;
```
``` response
┌─res──────────────┐
│ Munchener Strase │
└──────────────────┘
```

View File

@ -6,14 +6,17 @@ sidebar_label: Searching in Strings
# Functions for Searching in Strings
All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants.
Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is
`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected.
All functions in this section search case-sensitively by default. Case-insensitive search is usually provided by separate function variants.
Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is
:::note
Case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in the English language is
`I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected.
:::
Functions in this section also assume that the searched string (referred to in this section as `haystack`) and the search string (referred to in this section as `needle`) are single-byte encoded text. If this assumption is
violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function
variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the
results are undefined. Note that no automatic Unicode normalization is performed, you can use the
results are undefined. Note that no automatic Unicode normalization is performed, however you can use the
[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
[General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately.
@ -54,6 +57,8 @@ Type: `Integer`.
**Examples**
Query:
``` sql
SELECT position('Hello, world!', '!');
```
@ -68,6 +73,8 @@ Result:
Example with `start_pos` argument:
Query:
``` sql
SELECT
position('Hello, world!', 'o', 1),
@ -84,6 +91,8 @@ Result:
Example for `needle IN haystack` syntax:
Query:
```sql
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
```
@ -98,6 +107,8 @@ Result:
Examples with empty `needle` substring:
Query:
``` sql
SELECT
position('abc', ''),
@ -109,6 +120,8 @@ SELECT
position('abc', '', 5)
```
Result:
``` text
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
@ -132,7 +145,23 @@ locate(needle, haystack[, start_pos])
## positionCaseInsensitive
Like [position](#position) but searches case-insensitively.
A case insensitive invariant of [position](#position).
**Example**
Query:
``` sql
SELECT position('Hello, world!', 'hello');
```
Result:
``` text
┌─position('Hello, world!', 'hello')─┐
│ 0 │
└────────────────────────────────────┘
```
## positionUTF8
@ -142,6 +171,8 @@ Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded
Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint:
Query:
``` sql
SELECT positionUTF8('Motörhead', 'r');
```
@ -175,14 +206,17 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. Array
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned values**
- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found)
- Array of the starting position in bytes and counting from 1, if the substring was found.
- 0, if the substring was not found.
**Example**
Query:
``` sql
SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
```
@ -194,45 +228,535 @@ Result:
│ [0,13,0] │
└───────────────────────────────────────────────────────────────────┘
```
## multiSearchAllPositionsCaseInsensitive
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings.
## multiSearchFirstPosition
Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Like [multiSearchAllPositions](#multisearchallpositions) but ignores case.
**Syntax**
```sql
multiSearchFirstPosition(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `C` (`\x43`) and `H` (`\x48`).
Query:
```sql
SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsCaseInsensitiveUTF8
Like [multiSearchAllPositionsUTF8](#multisearchallpositionsutf8) but ignores case.
**Syntax**
```sql
multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `c` (`\x63`) and `h` (`\x68`).
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']);
```
Result:
```response
["1","6"]
```
## multiSearchFirstPosition
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']);
```
Result:
```response
3
```
## multiSearchFirstPositionCaseInsensitive
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings.
**Syntax**
```sql
multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `hello world` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionCaseInsensitiveUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings, ignoring case.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `HELLO WORLD` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstIndex
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndex('Hello World',['World','Hello']);
```
## multiSearchAny {#multisearchany}
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitive
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']);
```
Result:
```response
1
```
## multiSearchFirstIndexUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `Hello World` as a UTF-8 string, find the first index of UTF-8 strings `Hello` and `World`.
Query:
```sql
SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']);
```
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitiveUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `HELLO WORLD` as a UTF-8 string, find the first index of UTF-8 strings `hello` and `world`.
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']);
```
Result:
```response
1
```
## multiSearchAny
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchAny(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAny(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Query:
```sql
SELECT multiSearchAny('ClickHouse',['C','H']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitive
Like [multiSearchAny](#multisearchany) but ignores case.
**Syntax**
```sql
multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Query:
```sql
SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
1
```
## multiSearchAnyUTF8
Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
*Syntax**
```sql
multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there are any `C` ('\x43') or `H` ('\x48') letters in the word.
Query:
```sql
SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitiveUTF8
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case.
*Syntax**
```sql
multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there is any letter `h`(`\x68`) in the word, ignoring case.
Query:
```sql
SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']);
```
Result:
```response
1
```
## match {#match}

View File

@ -584,6 +584,278 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res
└──────────────────────┘
```
## tupleIntDiv
Does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDiv(tuple_num, tuple_div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5, 5, 5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5, 5, 5))─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5))─┐
│ (2,1,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivOrZero
Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZero(tuple_num, tuple_div)
```
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0));
```
Result:
``` text
┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivByNumber
Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDivByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDivByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8);
```
Result:
``` text
┌─tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8)─┐
│ (2,1,0) │
└─────────────────────────────────────────────┘
```
## tupleIntDivOrZeroByNumber
Like [tupleIntDivByNumber](#tupleintdivbynumber) it does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZeroByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDivByNumber](#tupleintdivbynumber).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└───────────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0)
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 0)─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleModulo
Returns a tuple of the moduli (remainders) of division operations of two tuples.
**Syntax**
```sql
tupleModulo(tuple_num, tuple_mod)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of modulus values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModulo((15, 10, 5), (5, 3, 2));
```
Result:
``` text
┌─tupleModulo((15, 10, 5), (5, 3, 2))─┐
│ (0,1,1) │
└─────────────────────────────────────┘
```
## tupleModuloByNumber
Returns a tuple of the moduli (remainders) of division operations of a tuple and a given divisor.
**Syntax**
```sql
tupleModuloByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModuloByNumber((15, 10, 5), 2);
```
Result:
``` text
┌─tupleModuloByNumber((15, 10, 5), 2)─┐
│ (1,0,1) │
└─────────────────────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -3,7 +3,7 @@
function _clickhouse_get_utils()
{
local cmd=$1 && shift
"$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }'
"$cmd" help |& awk '/^clickhouse.*args/ { print $2 }'
}
function _complete_for_clickhouse_entrypoint_bin()

View File

@ -934,8 +934,8 @@ void Client::addOptions(OptionsDescription & options_description)
("user,u", po::value<std::string>()->default_value("default"), "user")
("password", po::value<std::string>(), "password")
("ask-password", "ask-password")
("ssh-key-file", po::value<std::string>(), "File containing ssh private key needed for authentication. If not set does password authentication.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for imported ssh key.")
("ssh-key-file", po::value<std::string>(), "File containing the SSH private key for authenticate with the server.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for the SSH private key specified by --ssh-key-file.")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")

View File

@ -166,7 +166,7 @@ int DisksApp::main(const std::vector<String> & /*args*/)
{
String config_path = config().getString("config-file", getDefaultConfigFileName());
ConfigProcessor config_processor(config_path, false, false);
config_processor.setConfigPath(fs::path(config_path).parent_path());
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), false, false);
}

View File

@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml"));
/// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present.
config_processor.registerEmbeddedConfig("config.xml", "<clickhouse/>");
ConfigProcessor::registerEmbeddedConfig("config.xml", "<clickhouse/>");
auto clickhouse_config = config_processor.loadConfig();
Poco::Util::AbstractConfiguration::Keys keys;

View File

@ -122,7 +122,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
{
const auto config_path = config().getString("config-file", "config.xml");
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}

View File

@ -487,7 +487,7 @@ int main(int argc_, char ** argv_)
/// Interpret binary without argument or with arguments starts with dash
/// ('-') as clickhouse-local for better usability:
///
/// clickhouse # dumps help
/// clickhouse help # dumps help
/// clickhouse -q 'select 1' # use local
/// clickhouse # spawn local
/// clickhouse local # spawn local

View File

@ -4,11 +4,12 @@
#include <Access/ExternalAuthenticators.h>
#include <Access/LDAPClient.h>
#include <Access/GSSAcceptor.h>
#include <Common/Exception.h>
#include <Poco/SHA1Engine.h>
#include <Common/Exception.h>
#include <Common/SSHWrapper.h>
#include <Common/typeid_cast.h>
#include <Common/SSH/Wrappers.h>
#include "config.h"
namespace DB
{
@ -74,7 +75,7 @@ namespace
}
#if USE_SSH
bool checkSshSignature(const std::vector<ssh::SSHKey> & keys, std::string_view signature, std::string_view original)
bool checkSshSignature(const std::vector<SSHKey> & keys, std::string_view signature, std::string_view original)
{
for (const auto & key: keys)
if (key.isPublic() && key.verifySignature(signature, original))
@ -114,7 +115,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -145,7 +150,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -178,7 +187,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
@ -216,13 +229,18 @@ bool Authentication::areCredentialsValid(
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName());
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
}
}
#if USE_SSH
if (const auto * ssh_credentials = typeid_cast<const SshCredentials *>(&credentials))
{
switch (auth_data.getType())
@ -243,15 +261,12 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<SSLCertificateCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
#if USE_SSH
return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal());
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
case AuthenticationType::MAX:
break;
}
}
#endif
if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast<const AlwaysAllowCredentials *>(&credentials))
return true;

View File

@ -105,7 +105,10 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme)
#if USE_SSH
&& (lhs.ssh_keys == rhs.ssh_keys)
#endif
&& (lhs.http_auth_scheme == rhs.http_auth_scheme)
&& (lhs.http_auth_server_name == rhs.http_auth_server_name);
}
@ -326,7 +329,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
break;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
case AuthenticationType::HTTP:
@ -355,7 +358,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
{
#if USE_SSH
AuthenticationData auth_data(*query.type);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
size_t args_size = query.children.size();
for (size_t i = 0; i < args_size; ++i)
@ -366,7 +369,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(key_base64, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(key_base64, type));
}
catch (const std::invalid_argument &)
{
@ -377,7 +380,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
auth_data.setSSHKeys(std::move(keys));
return auth_data;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}

View File

@ -2,14 +2,16 @@
#include <Access/Common/AuthenticationType.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Common/SSHWrapper.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Common/SSH/Wrappers.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include "config.h"
namespace DB
{
@ -59,8 +61,10 @@ public:
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; }
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_);
const std::vector<ssh::SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<ssh::SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<ssh::SSHKey>>(ssh_keys_); }
#if USE_SSH
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<SSHKey>>(ssh_keys_); }
#endif
HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; }
void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; }
@ -94,7 +98,9 @@ private:
String kerberos_realm;
boost::container::flat_set<String> ssl_certificate_common_names;
String salt;
std::vector<ssh::SSHKey> ssh_keys;
#if USE_SSH
std::vector<SSHKey> ssh_keys;
#endif
/// HTTP authentication properties
String http_auth_server_name;
HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC;

View File

@ -34,8 +34,8 @@ enum class AuthenticationType
/// Password is encrypted in bcrypt hash.
BCRYPT_PASSWORD,
/// Server sends a random string named `challenge` which client needs to encrypt with private key.
/// The check is performed on server side by decrypting the data and comparing with the original string.
/// Server sends a random string named `challenge` to the client. The client encrypts it with its SSH private key.
/// The server decrypts the result using the SSH public key registered for the user and compares with the original string.
SSH_KEY,
/// Authentication through HTTP protocol

View File

@ -3,6 +3,7 @@
#include <base/types.h>
#include <memory>
#include "config.h"
namespace DB
{
@ -86,10 +87,11 @@ class MySQLNative41Credentials : public CredentialsWithScramble
using CredentialsWithScramble::CredentialsWithScramble;
};
#if USE_SSH
class SshCredentials : public Credentials
{
public:
explicit SshCredentials(const String& user_name_, const String& signature_, const String& original_)
SshCredentials(const String & user_name_, const String & signature_, const String & original_)
: Credentials(user_name_), signature(signature_), original(original_)
{
is_ready = true;
@ -117,5 +119,6 @@ private:
String signature;
String original;
};
#endif
}

View File

@ -31,7 +31,7 @@ void User::setName(const String & name_)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name is empty");
if (name_ == EncodedUserInfo::USER_INTERSERVER_MARKER)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
if (startsWith(name_, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
name = name_;
}

View File

@ -1,6 +1,5 @@
#include <Access/UsersConfigAccessStorage.h>
#include <Access/Quota.h>
#include <Common/SSH/Wrappers.h>
#include <Access/RowPolicy.h>
#include <Access/User.h>
#include <Access/Role.h>
@ -10,6 +9,7 @@
#include <Access/AccessChangesNotifier.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/SSHWrapper.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/transformEndianness.h>
@ -214,7 +214,7 @@ namespace
Poco::Util::AbstractConfiguration::Keys entries;
config.keys(ssh_keys_config, entries);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
for (const String& entry : entries)
{
const auto conf_pref = ssh_keys_config + "." + entry + ".";
@ -237,7 +237,7 @@ namespace
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(base64_key, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type));
}
catch (const std::invalid_argument &)
{
@ -249,7 +249,7 @@ namespace
}
user->auth_data.setSSHKeys(std::move(keys));
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
else if (has_http_auth)

View File

@ -54,30 +54,30 @@ public:
{
const auto & value = columns[0]->getFloat64(row_num);
const auto & time = columns[1]->getFloat64(row_num);
this->data(place).add(value, time, half_decay);
data(place).add(value, time, half_decay);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs), half_decay);
data(place).merge(data(rhs), half_decay);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).value, buf);
writeBinary(this->data(place).time, buf);
writeBinary(data(place).value, buf);
writeBinary(data(place).time, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).value, buf);
readBinary(this->data(place).time, buf);
readBinary(data(place).value, buf);
readBinary(data(place).time, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column = assert_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(this->data(place).get(half_decay));
column.getData().push_back(data(place).get(half_decay));
}
};

View File

@ -293,32 +293,32 @@ public:
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value, arena);
data(place).addY(value, arena);
else
this->data(place).addX(value, arena);
data(place).addX(value, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
data(place).merge(data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
if (!this->data(place).size_x || !this->data(place).size_y)
if (!data(place).size_x || !data(place).size_y)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
auto [d_statistic, p_value] = data(place).getResult(alternative, method);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));

View File

@ -147,6 +147,8 @@ public:
negative_store->merge(other.negative_store.get());
}
/// NOLINTBEGIN(readability-static-accessed-through-instance)
void serialize(WriteBuffer& buf) const
{
// Write the mapping
@ -201,6 +203,8 @@ public:
count = static_cast<Float64>(negative_store->count + zero_count + store->count);
}
/// NOLINTEND(readability-static-accessed-through-instance)
private:
std::unique_ptr<DDSketchLogarithmicMapping> mapping;
std::unique_ptr<DDSketchDenseStore> store;

View File

@ -87,6 +87,8 @@ public:
count += other->count;
}
/// NOLINTBEGIN(readability-static-accessed-through-instance)
void serialize(WriteBuffer& buf) const
{
@ -179,6 +181,8 @@ public:
}
}
/// NOLINTEND(readability-static-accessed-through-instance)
private:
UInt32 chunk_size;
DDSketchEncoding enc;

View File

@ -12,24 +12,6 @@ namespace DB
namespace
{
const std::unordered_set<String> possibly_injective_function_names
{
"dictGet",
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>
{
using Base = InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>;

View File

@ -2275,6 +2275,10 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_
*/
void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope)
{
const auto & settings = scope.context->getSettingsRef();
if (!settings.enable_positional_arguments || scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
return;
auto & node_list_typed = node_list->as<ListNode &>();
for (auto & node : node_list_typed.getNodes())
@ -2287,7 +2291,8 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
auto * constant_node = (*node_to_replace)->as<ConstantNode>();
if (!constant_node
|| (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
|| (constant_node->getValue().getType() != Field::Types::UInt64
&& constant_node->getValue().getType() != Field::Types::Int64))
continue;
UInt64 pos;
@ -5799,7 +5804,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
return result_projection_names;
}
FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters);
FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); /// NOLINT(readability-static-accessed-through-instance)
bool is_executable_udf = true;
IdentifierResolveScope::ResolvedFunctionsCache * function_cache = nullptr;
@ -5829,7 +5834,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
{
std::vector<std::string> possible_function_names;
auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context);
auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context); /// NOLINT(readability-static-accessed-through-instance)
possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end());
function_names = UserDefinedSQLFunctionFactory::instance().getAllRegisteredNames();
@ -5847,8 +5852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
possible_function_names.push_back(name);
}
NamePrompter<2> name_prompter;
auto hints = name_prompter.getHints(function_name, possible_function_names);
auto hints = NamePrompter<2>::getHints(function_name, possible_function_names);
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
"Function with name '{}' does not exists. In scope {}{}",
@ -6682,15 +6686,12 @@ void expandTuplesInList(QueryTreeNodes & key_list)
*/
void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope)
{
const auto & settings = scope.context->getSettingsRef();
if (query_node_typed.isGroupByWithGroupingSets())
{
QueryTreeNodes nullable_group_by_keys;
for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
@ -6709,8 +6710,7 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
}
else
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
@ -7861,8 +7861,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.isCTE())
cte_in_resolve_process.insert(query_node_typed.getCTEName());
const auto & settings = scope.context->getSettingsRef();
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
if (query_node_typed.isGroupByWithGroupingSets()
@ -8046,8 +8044,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasOrderBy())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
const auto & settings = scope.context->getSettingsRef();
expandOrderByAll(query_node_typed, settings);
resolveSortNodeList(query_node_typed.getOrderByNode(), scope);
@ -8070,8 +8069,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasLimitBy())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope);
resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
}

View File

@ -760,4 +760,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node;
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context)
{
const auto & storage_snapshot = table_node->as<TableNode>()->getStorageSnapshot();
auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
size_t columns_to_select_size = columns_to_select.size();
auto column_nodes_to_select = std::make_shared<ListNode>();
column_nodes_to_select->getNodes().reserve(columns_to_select_size);
NamesAndTypes projection_columns;
projection_columns.reserve(columns_to_select_size);
for (auto & column : columns_to_select)
{
column_nodes_to_select->getNodes().emplace_back(std::make_shared<ColumnNode>(column, table_node));
projection_columns.emplace_back(column.name, column.type);
}
auto subquery_for_table = std::make_shared<QueryNode>(Context::createCopy(context));
subquery_for_table->setIsSubquery(true);
subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select);
subquery_for_table->getJoinTree() = std::move(table_node);
subquery_for_table->resolveProjectionColumns(std::move(projection_columns));
return subquery_for_table;
}
}

View File

@ -105,4 +105,7 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Build subquery which we execute for `IN table` function.
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context);
}

View File

@ -85,7 +85,6 @@ add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io Common/Scheduler)
add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes)
add_headers_and_sources(clickhouse_common_io Common/SSH)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
@ -99,7 +98,6 @@ add_headers_and_sources(clickhouse_compression Core)
#Included these specific files to avoid linking grpc
add_glob(clickhouse_compression_headers Server/ServerType.h)
add_glob(clickhouse_compression_sources Server/ServerType.cpp)
add_headers_and_sources(clickhouse_compression Common/SSH)
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
@ -370,8 +368,7 @@ if (TARGET ch_contrib::crc32-vpmsum)
endif()
if (TARGET ch_contrib::ssh)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
endif()
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)

View File

@ -712,11 +712,20 @@ void ClientBase::adjustSettings()
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager.
if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed)
/// Do not limit pretty format output in case of --pager specified.
if (!pager.empty())
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed)
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
}
if (!global_context->getSettingsRef().output_format_pretty_max_value_width.changed)
{
settings.output_format_pretty_max_value_width = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_value_width.changed = false;
}
}
global_context->setSettings(settings);

View File

@ -67,7 +67,7 @@ Connection::~Connection() = default;
Connection::Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
const ssh::SSHKey & ssh_private_key_,
[[maybe_unused]] const SSHKey & ssh_private_key_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -76,7 +76,9 @@ Connection::Connection(const String & host_, UInt16 port_,
Protocol::Secure secure_)
: host(host_), port(port_), default_database(default_database_)
, user(user_), password(password_)
#if USE_SSH
, ssh_private_key(ssh_private_key_)
#endif
, quota_key(quota_key_)
, cluster(cluster_)
, cluster_secret(cluster_secret_)
@ -141,7 +143,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
async_callback(socket->impl()->sockfd(), connection_timeout, AsyncEventTimeoutType::CONNECT, description, AsyncTaskExecutor::READ | AsyncTaskExecutor::WRITE | AsyncTaskExecutor::ERROR);
if (auto err = socket->impl()->socketError())
socket->impl()->error(err); // Throws an exception
socket->impl()->error(err); // Throws an exception /// NOLINT(readability-static-accessed-through-instance)
socket->setBlocking(true);
}
@ -276,17 +278,6 @@ void Connection::disconnect()
}
String Connection::packStringForSshSign(String challenge)
{
String message;
message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION));
message.append(default_database);
message.append(user);
message.append(challenge);
return message;
}
void Connection::sendHello()
{
/** Disallow control characters in user controlled parameters
@ -334,10 +325,10 @@ void Connection::sendHello()
#endif
}
#if USE_SSH
/// Just inform server that we will authenticate using SSH keys.
else if (!ssh_private_key.isEmpty())
{
writeStringBinary(fmt::format("{}{}", EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER, user), *out);
/// Inform server that we will authenticate using SSH keys.
writeStringBinary(String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) + user, *out);
writeStringBinary(password, *out);
performHandshakeForSSHAuth();
@ -361,9 +352,9 @@ void Connection::sendAddendum()
}
#if USE_SSH
void Connection::performHandshakeForSSHAuth()
{
#if USE_SSH
String challenge;
{
writeVarUInt(Protocol::Client::SSHChallengeRequest, *out);
@ -388,12 +379,23 @@ void Connection::performHandshakeForSSHAuth()
}
writeVarUInt(Protocol::Client::SSHChallengeResponse, *out);
String to_sign = packStringForSshSign(challenge);
auto pack_string_for_ssh_sign = [&](String challenge_)
{
String message;
message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION));
message.append(default_database);
message.append(user);
message.append(challenge_);
return message;
};
String to_sign = pack_string_for_ssh_sign(challenge);
String signature = ssh_private_key.signString(to_sign);
writeStringBinary(signature, *out);
out->next();
#endif
}
#endif
void Connection::receiveHello(const Poco::Timespan & handshake_timeout)

View File

@ -1,10 +1,9 @@
#pragma once
#include <Poco/Net/StreamSocket.h>
#include <Common/SSH/Wrappers.h>
#include <Common/callOnce.h>
#include <Common/SSHWrapper.h>
#include <Client/IServerConnection.h>
#include <Core/Defines.h>
@ -53,7 +52,7 @@ public:
Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
const ssh::SSHKey & ssh_private_key_,
const SSHKey & ssh_private_key_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -170,7 +169,9 @@ private:
String default_database;
String user;
String password;
ssh::SSHKey ssh_private_key;
#if USE_SSH
SSHKey ssh_private_key;
#endif
String quota_key;
/// For inter-server authorization
@ -265,9 +266,10 @@ private:
void connect(const ConnectionTimeouts & timeouts);
void sendHello();
String packStringForSshSign(String challenge);
#if USE_SSH
void performHandshakeForSSHAuth();
#endif
void sendAddendum();
void receiveHello(const Poco::Timespan & handshake_timeout);

View File

@ -1,11 +1,10 @@
#include "ConnectionParameters.h"
#include <fstream>
#include <Core/Defines.h>
#include <Core/Protocol.h>
#include <Core/Types.h>
#include <IO/ConnectionTimeouts.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/SSH/Wrappers.h>
#include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Common/DNSResolver.h>
@ -88,19 +87,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
}
else
{
std::string prompt{"Enter your private key passphrase (leave empty for no passphrase): "};
std::string prompt{"Enter your SSH private key passphrase (leave empty for no passphrase): "};
char buf[1000] = {};
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
passphrase = result;
}
ssh::SSHKey key = ssh::SSHKeyFactory::makePrivateFromFile(filename, passphrase);
SSHKey key = SSHKeyFactory::makePrivateKeyFromFile(filename, passphrase);
if (!key.isPrivate())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found public key in file: {} but expected private", filename);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} did not contain a private key (is it a public key?)", filename);
ssh_private_key = std::move(key);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}

View File

@ -1,9 +1,10 @@
#pragma once
#include <string>
#include <Common/SSHWrapper.h>
#include <Core/Protocol.h>
#include <IO/ConnectionTimeouts.h>
#include <Common/SSH/Wrappers.h>
#include <string>
namespace Poco::Util
{
@ -20,7 +21,7 @@ struct ConnectionParameters
std::string user;
std::string password;
std::string quota_key;
ssh::SSHKey ssh_private_key;
SSHKey ssh_private_key;
Protocol::Secure security = Protocol::Secure::Disable;
Protocol::Compression compression = Protocol::Compression::Enable;
ConnectionTimeouts timeouts;

View File

@ -123,7 +123,7 @@ protected:
{
return std::make_shared<Connection>(
host, port,
default_database, user, password, ssh::SSHKey(), quota_key,
default_database, user, password, SSHKey(), quota_key,
cluster, cluster_secret,
client_name, compression, secure);
}

View File

@ -940,7 +940,7 @@ void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo &
if (nested_node)
{
/// Find any leaf of Nested subcolumn.
const auto * leaf = subcolumns.findLeaf(nested_node, [&](const auto &) { return true; });
const auto * leaf = Subcolumns::findLeaf(nested_node, [&](const auto &) { return true; });
assert(leaf);
/// Recreate subcolumn with default values and the same sizes of arrays.
@ -983,7 +983,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons
while (current_node)
{
/// Try to find the first Nested up to the current node.
const auto * node_nested = subcolumns.findParent(current_node,
const auto * node_nested = Subcolumns::findParent(current_node,
[](const auto & candidate) { return candidate.isNested(); });
if (!node_nested)
@ -993,7 +993,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons
/// for the last rows.
/// If there are no leaves, skip current node and find
/// the next node up to the current.
leaf = subcolumns.findLeaf(node_nested,
leaf = Subcolumns::findLeaf(node_nested,
[&](const auto & candidate)
{
return candidate.data.size() > old_size;

View File

@ -597,6 +597,7 @@
M(716, CANNOT_FORGET_PARTITION) \
M(717, EXPERIMENTAL_FEATURE_ERROR) \
M(718, TOO_SLOW_PARSING) \
M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
\
M(900, DISTRIBUTED_CACHE_ERROR) \
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \

View File

@ -1,4 +1,5 @@
#include <Common/SSH/Wrappers.h>
#include <Common/SSHWrapper.h>
# if USE_SSH
# include <stdexcept>
@ -10,6 +11,14 @@
# pragma clang diagnostic pop
namespace DB
{
namespace ErrorCodes
{
extern const int LIBSSH_ERROR;
}
namespace
{
@ -18,17 +27,19 @@ class SSHString
public:
explicit SSHString(std::string_view input)
{
string = ssh_string_new(input.size());
ssh_string_fill(string, input.data(), input.size());
if (string = ssh_string_new(input.size()); string == nullptr)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString");
if (int rc = ssh_string_fill(string, input.data(), input.size()); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString");
}
explicit SSHString(ssh_string c_other) { string = c_other; }
explicit SSHString(ssh_string other) { string = other; }
ssh_string get() { return string; }
String toString()
{
return String(ssh_string_get_char(string), ssh_string_len(string));
return {ssh_string_get_char(string), ssh_string_len(string)};
}
~SSHString()
@ -42,46 +53,28 @@ private:
}
namespace DB
{
namespace ErrorCodes
{
extern const int LIBSSH_ERROR;
}
namespace ssh
{
SSHKey SSHKeyFactory::makePrivateFromFile(String filename, String passphrase)
SSHKey SSHKeyFactory::makePrivateKeyFromFile(String filename, String passphrase)
{
ssh_key key;
int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key);
if (rc != SSH_OK)
{
if (int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH private key from file");
}
return SSHKey(key);
}
SSHKey SSHKeyFactory::makePublicFromFile(String filename)
SSHKey SSHKeyFactory::makePublicKeyFromFile(String filename)
{
ssh_key key;
int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key);
if (rc != SSH_OK)
if (int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH public key from file");
return SSHKey(key);
}
SSHKey SSHKeyFactory::makePublicFromBase64(String base64_key, String type_name)
SSHKey SSHKeyFactory::makePublicKeyFromBase64(String base64_key, String type_name)
{
ssh_key key;
auto key_type = ssh_key_type_from_name(type_name.c_str());
int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key);
if (rc != SSH_OK)
if (int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Bad SSH public key provided");
return SSHKey(key);
}
@ -90,6 +83,12 @@ SSHKey::SSHKey(const SSHKey & other)
key = ssh_key_dup(other.key);
}
SSHKey::SSHKey(SSHKey && other) noexcept
{
key = other.key;
other.key = nullptr;
}
SSHKey & SSHKey::operator=(const SSHKey & other)
{
ssh_key_free(key);
@ -119,13 +118,11 @@ bool SSHKey::isEqual(const SSHKey & other) const
String SSHKey::signString(std::string_view input) const
{
SSHString input_str(input);
ssh_string c_output = nullptr;
int rc = pki_sign_string(key, input_str.get(), &c_output);
if (rc != SSH_OK)
ssh_string output = nullptr;
if (int rc = pki_sign_string(key, input_str.get(), &output); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Error singing with ssh key");
SSHString output(c_output);
return output.toString();
SSHString output_str(output);
return output_str.toString();
}
bool SSHKey::verifySignature(std::string_view signature, std::string_view original) const
@ -149,18 +146,15 @@ namespace
{
struct CStringDeleter
{
[[maybe_unused]] void operator()(char * ptr) const { std::free(ptr); }
void operator()(char * ptr) const { std::free(ptr); }
};
}
String SSHKey::getBase64() const
{
char * buf = nullptr;
int rc = ssh_pki_export_pubkey_base64(key, &buf);
if (rc != SSH_OK)
if (int rc = ssh_pki_export_pubkey_base64(key, &buf); rc != SSH_OK)
throw DB::Exception(DB::ErrorCodes::LIBSSH_ERROR, "Failed to export public key to base64");
/// Create a String from cstring, which makes a copy of the first one and requires freeing memory after it
/// This is to safely manage buf memory
std::unique_ptr<char, CStringDeleter> buf_ptr(buf);
@ -177,7 +171,6 @@ SSHKey::~SSHKey()
ssh_key_free(key); // it's safe free from libssh
}
}
}
#endif

View File

@ -1,20 +1,18 @@
#pragma once
#include <Common/Exception.h>
#include "config.h"
#if USE_SSH
# include <string_view>
# include <base/types.h>
#include <Common/Exception.h>
#include <string_view>
#include <base/types.h>
#include "config.h"
#if USE_SSH
using ssh_key = struct ssh_key_struct *;
namespace DB
{
namespace ssh
{
class SSHKeyFactory;
class SSHKey
{
public:
@ -22,11 +20,7 @@ public:
~SSHKey();
SSHKey(const SSHKey & other);
SSHKey(SSHKey && other) noexcept
{
key = other.key;
other.key = nullptr;
}
SSHKey(SSHKey && other) noexcept;
SSHKey & operator=(const SSHKey & other);
SSHKey & operator=(SSHKey && other) noexcept;
@ -43,7 +37,7 @@ public:
String getBase64() const;
String getKeyType() const;
friend SSHKeyFactory;
friend class SSHKeyFactory;
private:
explicit SSHKey(ssh_key key_) : key(key_) { }
ssh_key key = nullptr;
@ -56,17 +50,14 @@ public:
/// The check whether the path is allowed to read for ClickHouse has
/// (e.g. a file is inside `user_files` directory)
/// to be done outside of this functions.
static SSHKey makePrivateFromFile(String filename, String passphrase);
static SSHKey makePublicFromFile(String filename);
static SSHKey makePublicFromBase64(String base64_key, String type_name);
static SSHKey makePrivateKeyFromFile(String filename, String passphrase);
static SSHKey makePublicKeyFromFile(String filename);
static SSHKey makePublicKeyFromBase64(String base64_key, String type_name);
};
}
}
#else
namespace ssh
{
class SSHKey
{
public:
@ -74,5 +65,4 @@ public:
[[ noreturn ]] bool isEmpty() { std::terminate(); }
[[ noreturn ]] String signString(std::string_view) const { std::terminate(); }
};
}
#endif

View File

@ -205,7 +205,7 @@ static void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexe
else
{
size_t hash_value = global_map.hash(*it);
size_t bucket = global_map.getBucketFromHash(hash_value);
size_t bucket = MapTwoLevel::getBucketFromHash(hash_value);
if (mutexes[bucket].try_lock())
{

View File

@ -483,7 +483,7 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe
ASSERT_TRUE(EqualByteContainers(test_sequence.data_type->getSizeOfValueInMemory(), source_data, decoded));
const auto header_size = codec.getHeaderSize();
const auto header_size = ICompressionCodec::getHeaderSize();
const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0);
if (expected_compression_ratio)

View File

@ -56,10 +56,11 @@ namespace DB
namespace EncodedUserInfo
{
/// Marker of the inter-server secret (passed in the user name)
/// Marker for the inter-server secret (passed as the user name)
/// (anyway user cannot be started with a whitespace)
const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET ";
/// Marker of the SSH keys based authentication (passed in the user name)
/// Marker for SSH-keys-based authentication (passed as the user name)
const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION ";
};
@ -160,8 +161,8 @@ namespace Protocol
ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster)
MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read
SSHChallengeRequest = 11, /// Request for SSH signature challenge
SSHChallengeResponse = 12, /// Request for SSH signature challenge
SSHChallengeRequest = 11, /// Request SSH signature challenge
SSHChallengeResponse = 12, /// Reply to SSH signature challenge
MAX = SSHChallengeResponse,
};

View File

@ -669,6 +669,7 @@ class IColumn;
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \
M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, "How the query cache handles queries against system tables, i.e. tables in databases 'system.*' and 'information_schema.*'", 0) \
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \

View File

@ -87,6 +87,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{
{"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
{"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
{"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
}},
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},

View File

@ -87,6 +87,10 @@ IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::B
{"save", QueryCacheNondeterministicFunctionHandling::Save},
{"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
IMPLEMENT_SETTING_ENUM(QueryCacheSystemTableHandling, ErrorCodes::BAD_ARGUMENTS,
{{"throw", QueryCacheSystemTableHandling::Throw},
{"save", QueryCacheSystemTableHandling::Save},
{"ignore", QueryCacheSystemTableHandling::Ignore}})
IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
{{"basic", FormatSettings::DateTimeInputFormat::Basic},

View File

@ -184,6 +184,15 @@ enum class QueryCacheNondeterministicFunctionHandling
DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling)
/// How the query cache handles queries against system tables, tables in databases 'system.*' and 'information_schema.*'
enum class QueryCacheSystemTableHandling
{
Throw,
Save,
Ignore
};
DECLARE_SETTING_ENUM(QueryCacheSystemTableHandling)
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)

View File

@ -332,6 +332,7 @@ private:
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
UInt32 thread_num,
ThreadStatus * thread_ptr) const
try
{
ThreadStatus thread_status;
@ -519,7 +520,7 @@ private:
}
}
/// ClickHouse Keeper does not link to some part of Settings.
/// ClickHouse Keeper does not link to some parts of Settings.
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
/// List changed settings.
if (!query_id.empty())
@ -537,12 +538,18 @@ private:
}
#endif
/// When everything is done, we will try to send these error messages to client.
/// When everything is done, we will try to send these error messages to the client.
if (thread_ptr)
thread_ptr->onFatalError();
fatal_error_printed.test_and_set();
}
catch (...)
{
/// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors.
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message);
}
};
@ -665,7 +672,7 @@ void BaseDaemon::reloadConfiguration()
*/
config_path = config().getString("config-file", getDefaultConfigFileName());
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true);
if (last_configuration != nullptr)

View File

@ -18,7 +18,7 @@ SerializationPtr DataTypeDate32::doGetDefaultSerialization() const
Field DataTypeDate32::getDefault() const
{
return -static_cast<Int64>(DateLUT::instance().getDayNumOffsetEpoch());
return -static_cast<Int64>(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance)
}
void registerDataTypeDate32(DataTypeFactory & factory)

View File

@ -34,7 +34,7 @@ TEST(JSONDataParser, ReadJSON)
JSONDataParser<SimdJSONParser> parser;
ReadBufferFromString buf(json_bad);
String res;
parser.readJSON(res, buf);
JSONDataParser<SimdJSONParser>::readJSON(res, buf);
ASSERT_EQ(json1, res);
}
@ -44,7 +44,7 @@ TEST(JSONDataParser, ReadJSON)
JSONDataParser<SimdJSONParser> parser;
ReadBufferFromString buf(json_bad);
String res;
parser.readJSON(res, buf);
JSONDataParser<SimdJSONParser>::readJSON(res, buf);
ASSERT_EQ(json2, res);
}
}

View File

@ -346,7 +346,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s
}
auto downloader_id = file_segment.getOrSetDownloader();
if (downloader_id == file_segment.getCallerId())
if (downloader_id == FileSegment::getCallerId())
{
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
{

View File

@ -279,7 +279,7 @@ struct ToDate32Transform32Or64Signed
static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone)
{
static const Int32 daynum_min_offset = -static_cast<Int32>(time_zone.getDayNumOffsetEpoch());
static const Int32 daynum_min_offset = -static_cast<Int32>(DateLUTImpl::getDayNumOffsetEpoch());
if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw)
{
@ -1092,7 +1092,7 @@ struct ConvertThroughParsing
{
if constexpr (std::is_same_v<ToDataType, DataTypeDate32>)
{
vec_to[i] = -static_cast<Int32>(DateLUT::instance().getDayNumOffsetEpoch());
vec_to[i] = -static_cast<Int32>(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance)
}
else
{

View File

@ -280,7 +280,7 @@ public:
/**
At compile time, result is unknown. We only know the Scale (number of fractional digits) at runtime.
Also nothing is known about size of whole part.
As in simple division/multiplication for decimals, we scale the result up, but is is explicit here and no downscale is performed.
As in simple division/multiplication for decimals, we scale the result up, but it is explicit here and no downscale is performed.
It guarantees that result will have given scale and it can also be MANUALLY converted to other decimal types later.
**/
if (scale > DecimalUtils::max_precision<Decimal256>)

View File

@ -106,7 +106,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & cont
if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name);
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance)
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name);
validateFunction(assert_cast<const ASTCreateFunctionQuery &>(create_function_query).function_core, function_name);
@ -118,7 +118,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & co
AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name);
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance)
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name);
}

View File

@ -32,7 +32,7 @@ namespace
#endif
/// Get the host name. Is is constant on single server, but is not constant in distributed queries.
/// Get the host name. It is constant on single server, but is not constant in distributed queries.
class FunctionHostName : public FunctionConstantBase<FunctionHostName, String, DataTypeString>
{
public:

View File

@ -37,7 +37,7 @@ MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache(
MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache(
MMappedFileCache & cache, const std::string & file_name, size_t offset)
{
mapped = cache.getOrSet(cache.hash(file_name, offset, -1), [&]
mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, -1), [&]
{
return std::make_shared<MMappedFile>(file_name, offset);
});

View File

@ -1071,7 +1071,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
auto current_context = data.getContext();
if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context))
if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context)) /// NOLINT(readability-static-accessed-through-instance)
{
Array parameters;
if (node.parameters)
@ -1087,7 +1087,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
}
}
function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters);
function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters); /// NOLINT(readability-static-accessed-through-instance)
}
if (!function_builder)

View File

@ -1056,7 +1056,7 @@ void NO_INLINE Aggregator::executeImplBatch(
/// During processing of row #i we will prefetch HashTable cell for row #(i + prefetch_look_ahead).
PrefetchingHelper prefetching;
size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue();
size_t prefetch_look_ahead = PrefetchingHelper::getInitialLookAheadValue();
/// Optimization for special case when there are no aggregate functions.
if (params.aggregates_size == 0)
@ -1077,7 +1077,7 @@ void NO_INLINE Aggregator::executeImplBatch(
{
if constexpr (prefetch && HasPrefetchMemberFunc<decltype(method.data), KeyHolder>)
{
if (i == row_begin + prefetching.iterationsToMeasure())
if (i == row_begin + PrefetchingHelper::iterationsToMeasure())
prefetch_look_ahead = prefetching.calcPrefetchLookAhead();
if (i + prefetch_look_ahead < row_end)
@ -1163,7 +1163,7 @@ void NO_INLINE Aggregator::executeImplBatch(
if constexpr (prefetch && HasPrefetchMemberFunc<decltype(method.data), KeyHolder>)
{
if (i == key_start + prefetching.iterationsToMeasure())
if (i == key_start + PrefetchingHelper::iterationsToMeasure())
prefetch_look_ahead = prefetching.calcPrefetchLookAhead();
if (i + prefetch_look_ahead < row_end)

View File

@ -2,11 +2,17 @@
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/IAST.h>
#include <Parsers/IParser.h>
#include <Parsers/TokenIterator.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseDatabaseAndTableName.h>
#include <Common/ProfileEvents.h>
#include <Common/SipHash.h>
#include <Common/TTLCachePolicy.h>
@ -52,7 +58,54 @@ struct HasNonDeterministicFunctionsMatcher
}
};
struct HasSystemTablesMatcher
{
struct Data
{
const ContextPtr context;
bool has_system_tables = false;
};
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
static void visit(const ASTPtr & node, Data & data)
{
if (data.has_system_tables)
return;
String database_table; /// or whatever else we get, e.g. just a table
/// SELECT [...] FROM <table>
if (const auto * table_identifier = node->as<ASTTableIdentifier>())
{
database_table = table_identifier->name();
}
/// SELECT [...] FROM clusterAllReplicas(<cluster>, <table>)
else if (const auto * identifier = node->as<ASTIdentifier>())
{
database_table = identifier->name();
}
/// Handle SELECT [...] FROM clusterAllReplicas(<cluster>, '<table>')
else if (const auto * literal = node->as<ASTLiteral>())
{
const auto & value = literal->value;
database_table = toString(value);
}
Tokens tokens(database_table.c_str(), database_table.c_str() + database_table.size(), /*max_query_size*/ 2048, /*skip_insignificant*/ true);
IParser::Pos pos(tokens, /*max_depth*/ 42, /*max_backtracks*/ 42);
Expected expected;
String database;
String table;
bool successfully_parsed = parseDatabaseAndTableName(pos, expected, database, table);
if (successfully_parsed)
if (DatabaseCatalog::isPredefinedDatabase(database))
data.has_system_tables = true;
}
};
using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor<HasNonDeterministicFunctionsMatcher, true>;
using HasSystemTablesVisitor = InDepthNodeVisitor<HasSystemTablesMatcher, true>;
}
@ -63,6 +116,13 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context)
return finder_data.has_non_deterministic_functions;
}
bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
{
HasSystemTablesMatcher::Data finder_data{context};
HasSystemTablesVisitor(finder_data).visit(ast);
return finder_data.has_system_tables;
}
namespace
{

View File

@ -17,6 +17,9 @@ namespace DB
/// Does AST contain non-deterministic functions like rand() and now()?
bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
/// Does AST contain system tables like "system.processes"?
bool astContainsSystemTables(ASTPtr ast, ContextPtr context);
/// Maps queries to query results. Useful to avoid repeated query calculation.
///
/// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated

View File

@ -56,7 +56,7 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, ContextP
BlockIO InterpreterAlterQuery::execute()
{
FunctionNameNormalizer().visit(query_ptr.get());
FunctionNameNormalizer::visit(query_ptr.get());
const auto & alter = query_ptr->as<ASTAlterQuery &>();
if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE)
{
@ -131,7 +131,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
if (modify_query)
{
// Expand CTE before filling default database
ApplyWithSubqueryVisitor().visit(*modify_query);
ApplyWithSubqueryVisitor::visit(*modify_query);
}
/// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc.

View File

@ -25,7 +25,7 @@ namespace ErrorCodes
BlockIO InterpreterCreateIndexQuery::execute()
{
FunctionNameNormalizer().visit(query_ptr.get());
FunctionNameNormalizer::visit(query_ptr.get());
auto current_context = getContext();
const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();

View File

@ -1114,7 +1114,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
// Table SQL definition is available even if the table is detached (even permanently)
auto query = database->getCreateTableQuery(create.getTable(), getContext());
FunctionNameNormalizer().visit(query.get());
FunctionNameNormalizer::visit(query.get());
auto create_query = query->as<ASTCreateQuery &>();
if (!create.is_dictionary && create_query.is_dictionary)
@ -1184,7 +1184,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (create.select && create.isView())
{
// Expand CTE before filling default database
ApplyWithSubqueryVisitor().visit(*create.select);
ApplyWithSubqueryVisitor::visit(*create.select);
AddDefaultDatabaseVisitor visitor(getContext(), current_database);
visitor.visit(*create.select);
}
@ -1763,7 +1763,7 @@ BlockIO InterpreterCreateQuery::executeQueryOnCluster(ASTCreateQuery & create)
BlockIO InterpreterCreateQuery::execute()
{
FunctionNameNormalizer().visit(query_ptr.get());
FunctionNameNormalizer::visit(query_ptr.get());
auto & create = query_ptr->as<ASTCreateQuery &>();
bool is_create_database = create.database && !create.table;

View File

@ -519,7 +519,8 @@ BlockIO InterpreterInsertQuery::execute()
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
/// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
const bool resize_to_max_insert_threads = !table->isView() && views.empty();
/// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
: std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());

View File

@ -168,7 +168,7 @@ void Set::setHeader(const ColumnsWithTypeAndName & header)
}
/// Choose data structure to use for the set.
data.init(data.chooseMethod(key_columns, key_sizes));
data.init(SetVariants::chooseMethod(key_columns, key_sizes));
}
void Set::fillSetElements()

View File

@ -144,7 +144,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
}
else
{
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context); /// NOLINT(readability-static-accessed-through-instance)
if (!function_builder)
function_builder = function_factory.get(function->name, context);

View File

@ -97,6 +97,7 @@ namespace DB
namespace ErrorCodes
{
extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS;
extern const int QUERY_CACHE_USED_WITH_SYSTEM_TABLE;
extern const int INTO_OUTFILE_NOT_ALLOWED;
extern const int INVALID_TRANSACTION;
extern const int LOGICAL_ERROR;
@ -912,7 +913,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// Propagate WITH statement to children ASTSelect.
if (settings.enable_global_with_statement)
{
ApplyWithGlobalVisitor().visit(ast);
ApplyWithGlobalVisitor::visit(ast);
}
{
@ -1187,15 +1188,26 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// top of the pipeline which stores the result in the query cache.
if (can_use_query_cache && settings.enable_writes_to_query_cache)
{
/// Only use the query cache if the query does not contain non-deterministic functions or system tables (which are typically non-deterministic)
const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
const bool ast_contains_system_tables = astContainsSystemTables(ast, context);
const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
const QueryCacheSystemTableHandling system_table_handling = settings.query_cache_system_table_handling;
if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS,
"The query result was not cached because the query contains a non-deterministic function."
" Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
if (ast_contains_system_tables && system_table_handling == QueryCacheSystemTableHandling::Throw)
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_SYSTEM_TABLE,
"The query result was not cached because the query contains a system table."
" Use setting `query_cache_system_table_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
if ((!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
&& (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
{
QueryCache::Key key(
ast, res.pipeline.getHeader(),

View File

@ -364,7 +364,7 @@ TEST_F(FileCacheTest, LRUPolicy)
std::cerr << "Step 1\n";
auto cache = DB::FileCache("1", settings);
cache.initialize();
auto key = cache.createKeyForPath("key1");
auto key = DB::FileCache::createKeyForPath("key1");
auto get_or_set = [&](size_t offset, size_t size)
{
@ -728,7 +728,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache2 = DB::FileCache("2", settings);
cache2.initialize();
auto key = cache2.createKeyForPath("key1");
auto key = DB::FileCache::createKeyForPath("key1");
/// Get [2, 29]
assertEqual(
@ -747,7 +747,7 @@ TEST_F(FileCacheTest, LRUPolicy)
fs::create_directories(settings2.base_path);
auto cache2 = DB::FileCache("3", settings2);
cache2.initialize();
auto key = cache2.createKeyForPath("key1");
auto key = DB::FileCache::createKeyForPath("key1");
/// Get [0, 24]
assertEqual(
@ -762,7 +762,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache = FileCache("4", settings);
cache.initialize();
const auto key = cache.createKeyForPath("key10");
const auto key = FileCache::createKeyForPath("key10");
const auto key_path = cache.getKeyPath(key, user);
cache.removeAllReleasable(user.user_id);
@ -786,7 +786,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache = DB::FileCache("5", settings);
cache.initialize();
const auto key = cache.createKeyForPath("key10");
const auto key = FileCache::createKeyForPath("key10");
const auto key_path = cache.getKeyPath(key, user);
cache.removeAllReleasable(user.user_id);
@ -823,7 +823,7 @@ TEST_F(FileCacheTest, writeBuffer)
segment_settings.kind = FileSegmentKind::Temporary;
segment_settings.unbounded = true;
auto cache_key = cache.createKeyForPath(key);
auto cache_key = FileCache::createKeyForPath(key);
auto holder = cache.set(cache_key, 0, 3, segment_settings, user);
/// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
std::filesystem::create_directories(cache.getKeyPath(cache_key, user));
@ -949,7 +949,7 @@ TEST_F(FileCacheTest, temporaryData)
const auto user = FileCache::getCommonUser();
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, TemporaryDataOnDiskSettings{});
auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
{
ASSERT_EQ(some_data_holder->size(), 5);
@ -1199,7 +1199,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
{
auto cache = DB::FileCache(std::to_string(++file_cache_name), settings);
cache.initialize();
auto key = cache.createKeyForPath("key1");
auto key = FileCache::createKeyForPath("key1");
auto add_range = [&](size_t offset, size_t size)
{

View File

@ -53,6 +53,8 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p
return path.replace_filename(ss.str());
}
/// NOLINTBEGIN(readability-static-accessed-through-instance)
void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name)
{
auto current_logger = config.getString("logger", "");
@ -393,6 +395,8 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log
}
}
/// NOLINTEND(readability-static-accessed-through-instance)
void Loggers::closeLogs(Poco::Logger & logger)
{
if (log_file)

View File

@ -1,6 +1,6 @@
#include <Parsers/Access/ParserPublicSSHKey.h>
#include <Parsers/Access/ASTPublicSSHKey.h>
#include <Parsers/Access/ASTPublicSSHKey.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>

View File

@ -89,27 +89,8 @@ public:
return;
auto subquery_to_execute = in_second_argument;
if (auto * table_node = in_second_argument->as<TableNode>())
{
auto storage_snapshot = table_node->getStorageSnapshot();
auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
size_t columns_to_select_size = columns_to_select.size();
auto column_nodes_to_select = std::make_shared<ListNode>();
column_nodes_to_select->getNodes().reserve(columns_to_select_size);
NamesAndTypes projection_columns;
projection_columns.reserve(columns_to_select_size);
for (auto & column : columns_to_select)
{
column_nodes_to_select->getNodes().emplace_back(std::make_shared<ColumnNode>(column, subquery_to_execute));
projection_columns.emplace_back(column.name, column.type);
}
auto subquery_for_table = std::make_shared<QueryNode>(Context::createCopy(planner_context.getQueryContext()));
subquery_for_table->setIsSubquery(true);
subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select);
subquery_for_table->getJoinTree() = std::move(subquery_to_execute);
subquery_for_table->resolveProjectionColumns(std::move(projection_columns));
subquery_to_execute = std::move(subquery_for_table);
}
if (in_second_argument->as<TableNode>())
subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context.getQueryContext());
sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings);
}

View File

@ -86,7 +86,7 @@ static const Graphite::Pattern undef_pattern =
.regexp_str = "",
.function = nullptr,
.retentions = Graphite::Retentions(),
.type = undef_pattern.TypeUndef,
.type = Graphite::Pattern::TypeUndef,
};
inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, std::string_view path)
@ -118,18 +118,18 @@ Graphite::RollupRule selectPatternForPath(
if (!pattern.regexp)
{
/// Default pattern
if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll)
if (first_match->type == Graphite::Pattern::TypeUndef && pattern.type == Graphite::Pattern::TypeAll)
{
/// There is only default pattern for both retention and aggregation
return {&pattern, &pattern};
}
if (pattern.type != first_match->type)
{
if (first_match->type == first_match->TypeRetention)
if (first_match->type == Graphite::Pattern::TypeRetention)
{
return {first_match, &pattern};
}
if (first_match->type == first_match->TypeAggregation)
if (first_match->type == Graphite::Pattern::TypeAggregation)
{
return {&pattern, first_match};
}
@ -140,23 +140,23 @@ Graphite::RollupRule selectPatternForPath(
if (pattern.regexp->match(path.data(), path.size()))
{
/// General pattern with matched path
if (pattern.type == pattern.TypeAll)
if (pattern.type == Graphite::Pattern::TypeAll)
{
/// Only for not default patterns with both function and retention parameters
return {&pattern, &pattern};
}
if (first_match->type == first_match->TypeUndef)
if (first_match->type == Graphite::Pattern::TypeUndef)
{
first_match = &pattern;
continue;
}
if (pattern.type != first_match->type)
{
if (first_match->type == first_match->TypeRetention)
if (first_match->type == Graphite::Pattern::TypeRetention)
{
return {first_match, &pattern};
}
if (first_match->type == first_match->TypeAggregation)
if (first_match->type == Graphite::Pattern::TypeAggregation)
{
return {&pattern, first_match};
}
@ -415,24 +415,24 @@ static const Pattern & appendGraphitePattern(
if (!pattern.function)
{
pattern.type = pattern.TypeRetention;
pattern.type = Graphite::Pattern::TypeRetention;
}
else if (pattern.retentions.empty())
{
pattern.type = pattern.TypeAggregation;
pattern.type = Graphite::Pattern::TypeAggregation;
}
else
{
pattern.type = pattern.TypeAll;
pattern.type = Graphite::Pattern::TypeAll;
}
if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll
if (pattern.type & Graphite::Pattern::TypeAggregation) /// TypeAggregation or TypeAll
if (pattern.function->allocatesMemoryInArena())
throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} isn't supported in GraphiteMergeTree", pattern.function->getName());
/// retention should be in descending order of age.
if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll
if (pattern.type & Graphite::Pattern::TypeRetention) /// TypeRetention or TypeAll
::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions);
patterns.emplace_back(pattern);

View File

@ -68,6 +68,10 @@ std::unique_ptr<QueryPlan> createLocalPlan(
if (context->getSettingsRef().allow_experimental_analyzer)
{
/// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to
/// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace
/// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289).
new_context->setSetting("enable_positional_arguments", Field(false));
auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
}

View File

@ -13,8 +13,6 @@
#include <QueryPipeline/ReadProgressCallback.h>
#include <Columns/ColumnConst.h>
#include <QueryPipeline/printPipeline.h>
namespace DB
{

View File

@ -1371,17 +1371,6 @@ std::string formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(const Poco::Ut
return result;
}
[[ maybe_unused ]] String createChallenge()
{
#if USE_SSL
pcg64_fast rng(randomSeed());
UInt64 rand = rng();
return encodeSHA256(&rand, sizeof(rand));
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Can't generate challenge, because ClickHouse was built without OpenSSL");
#endif
}
}
std::unique_ptr<Session> TCPHandler::makeSession()
@ -1399,16 +1388,6 @@ std::unique_ptr<Session> TCPHandler::makeSession()
return res;
}
String TCPHandler::prepareStringForSshValidation(String username, String challenge)
{
String output;
output.append(std::to_string(client_tcp_protocol_version));
output.append(default_database);
output.append(username);
output.append(challenge);
return output;
}
void TCPHandler::receiveHello()
{
/// Receive `hello` packet.
@ -1466,11 +1445,9 @@ void TCPHandler::receiveHello()
return;
}
is_ssh_based_auth = startsWith(user, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty();
is_ssh_based_auth = user.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty();
if (is_ssh_based_auth)
{
user.erase(0, String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size());
}
user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size());
session = makeSession();
const auto & client_info = session->getClientInfo();
@ -1498,7 +1475,9 @@ void TCPHandler::receiveHello()
}
}
}
#endif
#if USE_SSH
/// Perform handshake for SSH authentication
if (is_ssh_based_auth)
{
@ -1512,7 +1491,14 @@ void TCPHandler::receiveHello()
if (packet_type != Protocol::Client::SSHChallengeRequest)
throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet for requesting a challenge string");
auto challenge = createChallenge();
auto create_challenge = []()
{
pcg64_fast rng(randomSeed());
UInt64 rand = rng();
return encodeSHA256(&rand, sizeof(rand));
};
String challenge = create_challenge();
writeVarUInt(Protocol::Server::SSHChallenge, *out);
writeStringBinary(challenge, *out);
out->next();
@ -1523,7 +1509,17 @@ void TCPHandler::receiveHello()
throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet with a response for a challenge");
readStringBinary(signature, *in);
auto cred = SshCredentials(user, signature, prepareStringForSshValidation(user, challenge));
auto prepare_string_for_ssh_validation = [&](const String & username, const String & challenge_)
{
String output;
output.append(std::to_string(client_tcp_protocol_version));
output.append(default_database);
output.append(username);
output.append(challenge_);
return output;
};
auto cred = SshCredentials(user, signature, prepare_string_for_ssh_validation(user, challenge));
session->authenticate(cred, getClientAddress(client_info));
return;
}

View File

@ -216,7 +216,7 @@ private:
String default_database;
bool is_ssh_based_auth = false;
bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge
/// For inter-server secret (remote_server.*.secret)
bool is_interserver_mode = false;
bool is_interserver_authenticated = false;
@ -248,7 +248,6 @@ private:
void extractConnectionSettingsFromContext(const ContextPtr & context);
std::unique_ptr<Session> makeSession();
String prepareStringForSshValidation(String user, String challenge);
bool receiveProxyHeader();
void receiveHello();

View File

@ -152,7 +152,7 @@ StorageFileLog::StorageFileLog(
if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath()))
{
if (LoadingStrictnessLevel::ATTACH <= mode)
if (LoadingStrictnessLevel::SECONDARY_CREATE <= mode)
{
LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath());
return;
@ -467,7 +467,7 @@ void StorageFileLog::openFilesAndSetPos()
auto & reader = file_ctx.reader.value();
assertStreamGood(reader);
reader.seekg(0, reader.end);
reader.seekg(0, reader.end); /// NOLINT(readability-static-accessed-through-instance)
assertStreamGood(reader);
auto file_end = reader.tellg();

View File

@ -1,6 +1,5 @@
#include <Storages/MemorySettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTFunction.h>
#include <Common/Exception.h>
@ -11,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNKNOWN_SETTING;
extern const int SETTING_CONSTRAINT_VIOLATION;
}
IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
@ -32,5 +32,31 @@ void MemorySettings::loadFromQuery(ASTStorage & storage_def)
}
}
ASTPtr MemorySettings::getSettingsChangesQuery()
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
for (const auto & change : changes())
settings_ast->changes.push_back(change);
return settings_ast;
}
void MemorySettings::sanityCheck() const
{
if (min_bytes_to_keep > max_bytes_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
"Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}",
min_bytes_to_keep,
max_bytes_to_keep);
if (min_rows_to_keep > max_rows_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
"Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}",
min_rows_to_keep,
max_rows_to_keep);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Parsers/ASTSetQuery.h>
namespace DB
@ -24,6 +25,8 @@ DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
struct MemorySettings : public BaseSettings<memorySettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
ASTPtr getSettingsChangesQuery();
void sanityCheck() const;
};
}

View File

@ -318,7 +318,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
}
if (!from_remote_disk && isFullPartStorage(part->getDataPartStorage()))
part->checksums.checkEqual(data_checksums, false);
part->checksums.checkEqual(data_checksums, false, part->name);
return data_checksums;
}
@ -906,7 +906,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
else
{
if (isFullPartStorage(new_data_part->getDataPartStorage()))
new_data_part->checksums.checkEqual(data_checksums, false);
new_data_part->checksums.checkEqual(data_checksums, false, new_data_part->name);
LOG_DEBUG(log, "Download of part {} onto disk {} finished.", part_name, disk->getName());
}

View File

@ -3003,7 +3003,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
commands.apply(new_metadata, local_context);
if (commands.hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index)
if (AlterCommands::hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')");

View File

@ -28,33 +28,34 @@ namespace ErrorCodes
}
void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const
void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const
{
if (is_compressed && have_uncompressed)
{
if (!rhs.is_compressed)
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name);
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}, data part {}", name, part_name);
if (rhs.uncompressed_size != uncompressed_size)
{
throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part ({} vs {})",
name, uncompressed_size, rhs.uncompressed_size);
throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part {} ({} vs {})",
name, part_name, uncompressed_size, rhs.uncompressed_size);
}
if (rhs.uncompressed_hash != uncompressed_hash)
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part ({} vs {})",
name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash));
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part {} ({} vs {})",
name, part_name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash));
}
return;
}
if (rhs.file_size != file_size)
{
throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part ({} vs {})",
name, file_size, rhs.file_size);
throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part {} ({} vs {})",
name, part_name, file_size, rhs.file_size);
}
if (rhs.file_hash != file_hash)
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part ({} vs {})",
name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash));
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part {} ({} vs {})",
name, part_name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash));
}
}
@ -79,7 +80,7 @@ void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, cons
}
void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const
void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const
{
for (const auto & [name, _] : rhs.files)
if (!files.contains(name))
@ -95,7 +96,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
if (it == rhs.files.end())
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);
checksum.checkEqual(it->second, have_uncompressed, name);
checksum.checkEqual(it->second, have_uncompressed, name, part_name);
}
}
@ -435,19 +436,19 @@ String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPar
return checksums.getSerializedString();
}
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const
{
if (full_checksums && rhs.full_checksums)
full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files);
full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files, part_name);
// If full checksums were checked, check total checksums just in case
checkEqualImpl(rhs, check_uncompressed_hash_in_compressed_files);
}
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const
void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const
{
if (full_checksums)
full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files);
full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files, part_name);
// If full checksums were checked, check total checksums just in case
MinimalisticDataPartChecksums rhs_minimalistic;

View File

@ -32,7 +32,7 @@ struct MergeTreeDataPartChecksum
: file_size(file_size_), file_hash(file_hash_), is_compressed(true),
uncompressed_size(uncompressed_size_), uncompressed_hash(uncompressed_hash_) {}
void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const;
void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const;
void checkSize(const IDataPartStorage & storage, const String & name) const;
};
@ -61,7 +61,7 @@ struct MergeTreeDataPartChecksums
/// Checks that the set of columns and their checksums are the same. If not, throws an exception.
/// If have_uncompressed, for compressed files it compares the checksums of the decompressed data.
/// Otherwise, it compares only the checksums of the files.
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const;
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const;
static bool isBadChecksumsErrorCode(int code);
@ -132,8 +132,8 @@ struct MinimalisticDataPartChecksums
String getSerializedString() const;
static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic);
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const;
void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const;
void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const;
};

View File

@ -359,7 +359,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash())
throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name);
zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name);
checkDataPart(
part,

View File

@ -350,7 +350,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
return {};
if (require_checksums || !checksums_txt.files.empty())
checksums_txt.checkEqual(checksums_data, check_uncompressed);
checksums_txt.checkEqual(checksums_data, check_uncompressed, data_part->name);
return checksums_data;
}

View File

@ -116,7 +116,7 @@ StorageDictionary::StorageDictionary(
: StorageDictionary(
table_id,
table_id.getFullNameNotQuoted(),
context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration),
context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), /// NOLINT(readability-static-accessed-through-instance)
dictionary_configuration->getString("dictionary.comment", ""),
Location::SameDatabaseAndNameAsDictionary,
context_)

View File

@ -437,7 +437,7 @@ void fuzzJSONObject(
bool first = true;
for (const auto & ptr : node_list)
{
if (node_count >= config.value_number_limit)
if (node_count >= StorageFuzzJSON::Configuration::value_number_limit)
break;
WriteBufferFromOwnString child_out;

View File

@ -46,7 +46,6 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int CANNOT_RESTORE_TABLE;
extern const int NOT_IMPLEMENTED;
extern const int SETTING_CONSTRAINT_VIOLATION;
}
class MemorySink : public SinkToStorage
@ -76,7 +75,7 @@ public:
convertDynamicColumnsToTuples(block, storage_snapshot);
}
if (storage.compress)
if (storage.getMemorySettingsRef().compress)
{
Block compressed_block;
for (const auto & elem : block)
@ -106,15 +105,16 @@ public:
auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows;
UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes;
const auto & memory_settings = storage.getMemorySettingsRef();
while (!new_data->empty()
&& ((storage.max_bytes_to_keep && new_total_bytes > storage.max_bytes_to_keep)
|| (storage.max_rows_to_keep && new_total_rows > storage.max_rows_to_keep)))
&& ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep)
|| (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep)))
{
Block oldest_block = new_data->front();
UInt64 rows_to_remove = oldest_block.rows();
UInt64 bytes_to_remove = oldest_block.allocatedBytes();
if (new_total_bytes - bytes_to_remove < storage.min_bytes_to_keep
|| new_total_rows - rows_to_remove < storage.min_rows_to_keep)
if (new_total_bytes - bytes_to_remove < memory_settings.min_bytes_to_keep
|| new_total_rows - rows_to_remove < memory_settings.min_rows_to_keep)
{
break; // stop - removing next block will put us under min_bytes / min_rows threshold
}
@ -145,15 +145,16 @@ StorageMemory::StorageMemory(
ColumnsDescription columns_description_,
ConstraintsDescription constraints_,
const String & comment,
const MemorySettings & settings)
: IStorage(table_id_), data(std::make_unique<const Blocks>()), compress(settings.compress),
min_rows_to_keep(settings.min_rows_to_keep), max_rows_to_keep(settings.max_rows_to_keep),
min_bytes_to_keep(settings.min_bytes_to_keep), max_bytes_to_keep(settings.max_bytes_to_keep)
const MemorySettings & memory_settings_)
: IStorage(table_id_)
, data(std::make_unique<const Blocks>())
, memory_settings(memory_settings_)
{
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(std::move(columns_description_));
storage_metadata.setConstraints(std::move(constraints_));
storage_metadata.setComment(comment);
storage_metadata.setSettingsChanges(memory_settings.getSettingsChangesQuery());
setInMemoryMetadata(storage_metadata);
}
@ -239,7 +240,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context
Block block;
while (executor.pull(block))
{
if (compress)
if (memory_settings.compress)
for (auto & elem : block)
elem.column = elem.column->compress();
@ -294,6 +295,59 @@ void StorageMemory::truncate(
total_size_rows.store(0, std::memory_order_relaxed);
}
void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr context, DB::IStorage::AlterLockHolder & /*alter_lock_holder*/)
{
auto table_id = getStorageID();
StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
params.apply(new_metadata, context);
if (params.isSettingsAlter())
{
auto & settings_changes = new_metadata.settings_changes->as<ASTSetQuery &>();
auto changed_settings = memory_settings;
changed_settings.applyChanges(settings_changes.changes);
changed_settings.sanityCheck();
/// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values,
/// the old data needs to be removed.
if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > changed_settings.max_bytes_to_keep
|| !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > changed_settings.max_rows_to_keep)
{
std::lock_guard lock(mutex);
auto new_data = std::make_unique<Blocks>(*(data.get()));
UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed);
UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed);
while (!new_data->empty()
&& ((changed_settings.max_bytes_to_keep && new_total_bytes > changed_settings.max_bytes_to_keep)
|| (changed_settings.max_rows_to_keep && new_total_rows > changed_settings.max_rows_to_keep)))
{
Block oldest_block = new_data->front();
UInt64 rows_to_remove = oldest_block.rows();
UInt64 bytes_to_remove = oldest_block.allocatedBytes();
if (new_total_bytes - bytes_to_remove < changed_settings.min_bytes_to_keep
|| new_total_rows - rows_to_remove < changed_settings.min_rows_to_keep)
{
break; // stop - removing next block will put us under min_bytes / min_rows threshold
}
// delete old block from current storage table
new_total_rows -= rows_to_remove;
new_total_bytes -= bytes_to_remove;
new_data->erase(new_data->begin());
}
data.set(std::move(new_data));
total_size_rows.store(new_total_rows, std::memory_order_relaxed);
total_size_bytes.store(new_total_bytes, std::memory_order_relaxed);
}
memory_settings = std::move(changed_settings);
}
DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata);
setInMemoryMetadata(new_metadata);
}
namespace
{
@ -499,7 +553,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat
while (auto block = block_in.read())
{
if (compress)
if (memory_settings.compress)
{
Block compressed_block;
for (const auto & elem : block)
@ -534,7 +588,8 @@ void StorageMemory::checkAlterIsPossible(const AlterCommands & commands, Context
{
if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
&& command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN
&& command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN)
&& command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN
&& command.type != AlterCommand::Type::MODIFY_SETTING)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}",
command.type, getName());
}
@ -566,9 +621,7 @@ void registerStorageMemory(StorageFactory & factory)
if (has_settings)
settings.loadFromQuery(*args.storage_def);
if (settings.min_bytes_to_keep > settings.max_bytes_to_keep
|| settings.min_rows_to_keep > settings.max_rows_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max.");
settings.sanityCheck();
return std::make_shared<StorageMemory>(args.table_id, args.columns, args.constraints, args.comment, settings);
},

View File

@ -31,7 +31,7 @@ public:
ColumnsDescription columns_description_,
ConstraintsDescription constraints_,
const String & comment,
const MemorySettings & settings = MemorySettings());
const MemorySettings & memory_settings_ = MemorySettings());
String getName() const override { return "Memory"; }
@ -46,6 +46,8 @@ public:
StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override;
const MemorySettings & getMemorySettingsRef() const { return memory_settings; }
void read(
QueryPlan & query_plan,
const Names & column_names,
@ -78,6 +80,7 @@ public:
void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override;
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & alter_lock_holder) override;
std::optional<UInt64> totalRows(const Settings &) const override;
std::optional<UInt64> totalBytes(const Settings &) const override;
@ -134,12 +137,7 @@ private:
std::atomic<size_t> total_size_bytes = 0;
std::atomic<size_t> total_size_rows = 0;
bool compress;
UInt64 min_rows_to_keep;
UInt64 max_rows_to_keep;
UInt64 min_bytes_to_keep;
UInt64 max_bytes_to_keep;
MemorySettings memory_settings;
friend class ReadFromMemoryStorageStep;
};

View File

@ -2317,7 +2317,7 @@ std::optional<CheckResult> StorageMergeTree::checkDataNext(DataValidationTasksPt
try
{
auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true);
calculated_checksums.checkEqual(part->checksums, true);
calculated_checksums.checkEqual(part->checksums, true, part->name);
auto & part_mutable = const_cast<IMergeTreeDataPart &>(*part);
part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings());

View File

@ -1837,7 +1837,7 @@ bool StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(
"(it may rarely happen on race condition with KILL MUTATION).", part_name, replica);
}
replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name);
break;
}
@ -5675,7 +5675,7 @@ std::optional<QueryPipeline> StorageReplicatedMergeTree::distributedWriteFromClu
{
auto connection = std::make_shared<Connection>(
node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(),
node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
"ParallelInsertSelectInititiator",
node.compression,
node.secure

Some files were not shown because too many files have changed in this diff Show More