Merge remote-tracking branch 'ClickHouse/master' into add-hostname-blob_storage_log-system-table

This commit is contained in:
Robert Schulze 2024-04-10 13:02:44 +00:00
commit 84525faee8
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
82 changed files with 1923 additions and 348 deletions

View File

@ -4835,7 +4835,7 @@ for (;; ptr++)
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
but those above are are explicitly listed afterwards. A flag byte tells
but those above are explicitly listed afterwards. A flag byte tells
whether the bitmap is present, and whether this is a negated class or not.
In JavaScript compatibility mode, an isolated ']' causes an error. In

View File

@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
# create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
)
add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

View File

@ -1,26 +1,18 @@
option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES})
option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES})
if (NOT ENABLE_SSH)
message(STATUS "Not using SSH")
message(STATUS "Not using libssh")
return()
endif()
# CMake variables needed by libssh_version.h.cmake, update them when you update libssh
set(libssh_VERSION_MAJOR 0)
set(libssh_VERSION_MINOR 9)
set(libssh_VERSION_PATCH 8)
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
# Set CMake variables which are used in libssh_version.h.cmake
project(libssh VERSION 0.9.8 LANGUAGES C)
set(LIBRARY_VERSION "4.8.8")
set(LIBRARY_SOVERSION "4")
set(CMAKE_THREAD_PREFER_PTHREADS ON)
set(THREADS_PREFER_PTHREAD_FLAG ON)
set(WITH_ZLIB OFF)
set(WITH_SYMBOL_VERSIONING OFF)
set(WITH_SERVER ON)
set(libssh_SRCS
${LIB_SOURCE_DIR}/src/agent.c
${LIB_SOURCE_DIR}/src/auth.c
@ -28,15 +20,21 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/bignum.c
${LIB_SOURCE_DIR}/src/buffer.c
${LIB_SOURCE_DIR}/src/callbacks.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/channels.c
${LIB_SOURCE_DIR}/src/client.c
${LIB_SOURCE_DIR}/src/config.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/connect.c
${LIB_SOURCE_DIR}/src/connector.c
${LIB_SOURCE_DIR}/src/curve25519.c
${LIB_SOURCE_DIR}/src/dh.c
${LIB_SOURCE_DIR}/src/ecdh.c
${LIB_SOURCE_DIR}/src/error.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/getpass.c
${LIB_SOURCE_DIR}/src/init.c
${LIB_SOURCE_DIR}/src/kdf.c
@ -55,37 +53,32 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/pcap.c
${LIB_SOURCE_DIR}/src/pki.c
${LIB_SOURCE_DIR}/src/pki_container_openssh.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/poll.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/scp.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/socket.c
${LIB_SOURCE_DIR}/src/string.c
${LIB_SOURCE_DIR}/src/threads.c
${LIB_SOURCE_DIR}/src/wrapper.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/token.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/wrapper.c
# some files of libssh/src/ are missing - why?
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/pthread.c
# files missing - why?
# LIBCRYPT specific
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/ecdh_crypto.c
${LIB_SOURCE_DIR}/src/libcrypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
${LIB_SOURCE_DIR}/src/bind.c
${LIB_SOURCE_DIR}/src/bind_config.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
)
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
@ -94,7 +87,7 @@ endif()
configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY)
add_library(_ssh STATIC ${libssh_SRCS})
add_library(_ssh ${libssh_SRCS})
add_library(ch_contrib::ssh ALIAS _ssh)
target_link_libraries(_ssh PRIVATE OpenSSL::Crypto)

View File

@ -45,6 +45,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Modify settings**
```sql
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
## Examples {#examples}
@ -97,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data
│ 65536 │ 10000 │
└─────────────┴────────────┘
```

View File

@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020"
The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Source: https://zenodo.org/records/5092942
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021
Run the command:
```bash
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget
```
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
@ -127,15 +127,15 @@ Average flight distance is around 1000 km.
Query:
```sql
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky;
```
Result:
```text
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
│ 1041090.6465708319
└────────────────────────────────────────────────────────────────────┘
┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐
1. 1041090.67 │ -- 1.04 million
──────────────────────────────────────────────────────────────────────────────┘
```
### Most busy origin airports and the average distance seen {#busy-airports-average-distance}

View File

@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
may return cached results then.
use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa
To force caching of results of queries with non-deterministic functions regardless, use setting
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force
caching of results of queries with system tables regardless, use setting
[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling).
:::note
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.

View File

@ -287,7 +287,7 @@ Default value: 0 (seconds)
## remote_fs_execute_merges_on_single_replica_time_threshold
When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.

View File

@ -1689,6 +1689,18 @@ Possible values:
Default value: `throw`.
## query_cache_system_table_handling {#query-cache-system-table-handling}
Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
Possible values:
- `'throw'` - Throw an exception and don't cache the query result.
- `'save'` - Cache the query result.
- `'ignore'` - Don't cache the query result and don't throw an exception.
Default value: `throw`.
## query_cache_min_query_runs {#query-cache-min-query-runs}
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
@ -5302,7 +5314,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`.

View File

@ -15,9 +15,9 @@ The `uniqCombined` function is a good choice for calculating the number of diffe
**Arguments**
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
**Returned value**
@ -25,26 +25,43 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
**Implementation details**
Function:
The `uniqCombined` function:
- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64).
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`:
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network.
**Example**
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e6);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 1001148 │ -- 1.00 million
└──────────────────────┘
```
See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs.
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)

View File

@ -5,4 +5,78 @@ sidebar_position: 193
# uniqCombined64
Same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types.
Calculates the approximate number of different argument values. It is the same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses a 64-bit hash for all data types rather than just for the String data type.
``` sql
uniqCombined64(HLL_precision)(x[, ...])
```
**Parameters**
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optionally, you can use the function as `uniqCombined64(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
**Returned value**
- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
**Implementation details**
The `uniqCombined64` function:
- Calculates a hash (64-bit hash for all data types) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 64-bit hash for all types, the result does not suffer from very high error for cardinalities significantly larger than `UINT_MAX` like [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) does, which uses a 32-bit hash for non-`String` types.
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined64` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
**Example**
In the example below `uniqCombined64` is run on `1e10` different numbers returning a very close approximation of the number of different argument values.
Query:
```sql
SELECT uniqCombined64(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined64(number)─┐
│ 9998568925 │ -- 10.00 billion
└────────────────────────┘
```
By comparison the `uniqCombined` function returns a rather poor approximation for an input this size.
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 5545308725 │ -- 5.55 billion
└──────────────────────┘
```
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)

View File

@ -81,6 +81,43 @@ Result:
│ 2.23606797749979 │
└──────────────────┘
```
## L2SquaredNorm
Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared.
**Syntax**
```sql
L2SquaredNorm(vector)
```
Alias: `normL2Squared`.
***Arguments**
- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
**Returned value**
- L2-norm squared.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2SquaredNorm((1, 2));
```
Result:
```text
┌─L2SquaredNorm((1, 2))─┐
│ 5 │
└───────────────────────┘
```
## LinfNorm

View File

@ -594,6 +594,45 @@ Calculates JumpConsistentHash form a UInt64.
Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32.
For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf)
## kostikConsistentHash
An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`.
**Syntax**
```sql
kostikConsistentHash(input, n)
```
Alias: `yandexConsistentHash` (left for backwards compatibility sake).
**Parameters**
- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md).
**Returned value**
- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
**Implementation details**
It is efficient only if n <= 32768.
**Example**
Query:
```sql
SELECT kostikConsistentHash(16045690984833335023, 2);
```
```response
┌─kostikConsistentHash(16045690984833335023, 2)─┐
│ 1 │
└───────────────────────────────────────────────┘
```
## murmurHash2_32, murmurHash2_64
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
@ -1153,6 +1192,42 @@ Result:
└────────────┘
```
## wyHash64
Produces a 64-bit [wyHash64](https://github.com/wangyi-fudan/wyhash) hash value.
**Syntax**
```sql
wyHash64(string)
```
**Arguments**
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- Hash value.
Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
SELECT wyHash64('ClickHouse') AS Hash;
```
Result:
```response
┌─────────────────Hash─┐
│ 12336419557878201794 │
└──────────────────────┘
```
## ngramMinHash
Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.

View File

@ -260,8 +260,36 @@ Alias: `lcase`
Converts the ASCII Latin symbols in a string to uppercase.
**Syntax**
``` sql
upper(input)
```
Alias: `ucase`
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Examples**
Query:
``` sql
SELECT upper('value') as Upper;
```
``` response
┌─Upper─┐
│ VALUE │
└───────┘
```
## lowerUTF8
Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
@ -278,6 +306,34 @@ Does not detect the language, e.g. for Turkish the result might not be exactly c
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
**Syntax**
``` sql
upperUTF8(input)
```
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Example**
Query:
``` sql
SELECT upperUTF8('München') as Upperutf8;
```
``` response
┌─Upperutf8─┐
│ MÜNCHEN │
└───────────┘
```
## isValidUTF8
Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0.

View File

@ -193,3 +193,33 @@ Result:
## translateUTF8
Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings.
**Syntax**
``` sql
translateUTF8(s, from, to)
```
**Parameters**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res;
```
``` response
┌─res──────────────┐
│ Munchener Strase │
└──────────────────┘
```

View File

@ -6,14 +6,17 @@ sidebar_label: Searching in Strings
# Functions for Searching in Strings
All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants.
Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is
`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected.
All functions in this section search case-sensitively by default. Case-insensitive search is usually provided by separate function variants.
Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is
:::note
Case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in the English language is
`I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected.
:::
Functions in this section also assume that the searched string (referred to in this section as `haystack`) and the search string (referred to in this section as `needle`) are single-byte encoded text. If this assumption is
violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function
variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the
results are undefined. Note that no automatic Unicode normalization is performed, you can use the
results are undefined. Note that no automatic Unicode normalization is performed, however you can use the
[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
[General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately.
@ -54,6 +57,8 @@ Type: `Integer`.
**Examples**
Query:
``` sql
SELECT position('Hello, world!', '!');
```
@ -68,6 +73,8 @@ Result:
Example with `start_pos` argument:
Query:
``` sql
SELECT
position('Hello, world!', 'o', 1),
@ -84,6 +91,8 @@ Result:
Example for `needle IN haystack` syntax:
Query:
```sql
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
```
@ -98,6 +107,8 @@ Result:
Examples with empty `needle` substring:
Query:
``` sql
SELECT
position('abc', ''),
@ -109,6 +120,8 @@ SELECT
position('abc', '', 5)
```
Result:
``` text
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
@ -132,7 +145,23 @@ locate(needle, haystack[, start_pos])
## positionCaseInsensitive
Like [position](#position) but searches case-insensitively.
A case insensitive invariant of [position](#position).
**Example**
Query:
``` sql
SELECT position('Hello, world!', 'hello');
```
Result:
``` text
┌─position('Hello, world!', 'hello')─┐
│ 0 │
└────────────────────────────────────┘
```
## positionUTF8
@ -142,6 +171,8 @@ Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded
Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint:
Query:
``` sql
SELECT positionUTF8('Motörhead', 'r');
```
@ -175,14 +206,17 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. Array
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned values**
- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found)
- Array of the starting position in bytes and counting from 1, if the substring was found.
- 0, if the substring was not found.
**Example**
Query:
``` sql
SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
```
@ -194,45 +228,535 @@ Result:
│ [0,13,0] │
└───────────────────────────────────────────────────────────────────┘
```
## multiSearchAllPositionsCaseInsensitive
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings.
## multiSearchFirstPosition
Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Like [multiSearchAllPositions](#multisearchallpositions) but ignores case.
**Syntax**
```sql
multiSearchFirstPosition(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `C` (`\x43`) and `H` (`\x48`).
Query:
```sql
SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsCaseInsensitiveUTF8
Like [multiSearchAllPositionsUTF8](#multisearchallpositionsutf8) but ignores case.
**Syntax**
```sql
multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `c` (`\x63`) and `h` (`\x68`).
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']);
```
Result:
```response
["1","6"]
```
## multiSearchFirstPosition
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']);
```
Result:
```response
3
```
## multiSearchFirstPositionCaseInsensitive
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings.
**Syntax**
```sql
multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `hello world` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionCaseInsensitiveUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings, ignoring case.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `HELLO WORLD` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstIndex
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndex('Hello World',['World','Hello']);
```
## multiSearchAny {#multisearchany}
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitive
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']);
```
Result:
```response
1
```
## multiSearchFirstIndexUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `Hello World` as a UTF-8 string, find the first index of UTF-8 strings `Hello` and `World`.
Query:
```sql
SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']);
```
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitiveUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `HELLO WORLD` as a UTF-8 string, find the first index of UTF-8 strings `hello` and `world`.
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']);
```
Result:
```response
1
```
## multiSearchAny
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchAny(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAny(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Query:
```sql
SELECT multiSearchAny('ClickHouse',['C','H']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitive
Like [multiSearchAny](#multisearchany) but ignores case.
**Syntax**
```sql
multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Query:
```sql
SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
1
```
## multiSearchAnyUTF8
Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
*Syntax**
```sql
multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there are any `C` ('\x43') or `H` ('\x48') letters in the word.
Query:
```sql
SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitiveUTF8
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case.
*Syntax**
```sql
multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there is any letter `h`(`\x68`) in the word, ignoring case.
Query:
```sql
SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']);
```
Result:
```response
1
```
## match {#match}

View File

@ -584,6 +584,278 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res
└──────────────────────┘
```
## tupleIntDiv
Does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDiv(tuple_num, tuple_div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5, 5, 5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5, 5, 5))─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5))─┐
│ (2,1,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivOrZero
Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZero(tuple_num, tuple_div)
```
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0));
```
Result:
``` text
┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivByNumber
Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDivByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDivByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8);
```
Result:
``` text
┌─tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8)─┐
│ (2,1,0) │
└─────────────────────────────────────────────┘
```
## tupleIntDivOrZeroByNumber
Like [tupleIntDivByNumber](#tupleintdivbynumber) it does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZeroByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDivByNumber](#tupleintdivbynumber).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└───────────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0)
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 0)─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleModulo
Returns a tuple of the moduli (remainders) of division operations of two tuples.
**Syntax**
```sql
tupleModulo(tuple_num, tuple_mod)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of modulus values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModulo((15, 10, 5), (5, 3, 2));
```
Result:
``` text
┌─tupleModulo((15, 10, 5), (5, 3, 2))─┐
│ (0,1,1) │
└─────────────────────────────────────┘
```
## tupleModuloByNumber
Returns a tuple of the moduli (remainders) of division operations of a tuple and a given divisor.
**Syntax**
```sql
tupleModuloByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModuloByNumber((15, 10, 5), 2);
```
Result:
``` text
┌─tupleModuloByNumber((15, 10, 5), 2)─┐
│ (1,0,1) │
└─────────────────────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -934,8 +934,8 @@ void Client::addOptions(OptionsDescription & options_description)
("user,u", po::value<std::string>()->default_value("default"), "user")
("password", po::value<std::string>(), "password")
("ask-password", "ask-password")
("ssh-key-file", po::value<std::string>(), "File containing ssh private key needed for authentication. If not set does password authentication.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for imported ssh key.")
("ssh-key-file", po::value<std::string>(), "File containing the SSH private key for authenticate with the server.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for the SSH private key specified by --ssh-key-file.")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")

View File

@ -4,11 +4,12 @@
#include <Access/ExternalAuthenticators.h>
#include <Access/LDAPClient.h>
#include <Access/GSSAcceptor.h>
#include <Common/Exception.h>
#include <Poco/SHA1Engine.h>
#include <Common/Exception.h>
#include <Common/SSHWrapper.h>
#include <Common/typeid_cast.h>
#include <Common/SSH/Wrappers.h>
#include "config.h"
namespace DB
{
@ -74,7 +75,7 @@ namespace
}
#if USE_SSH
bool checkSshSignature(const std::vector<ssh::SSHKey> & keys, std::string_view signature, std::string_view original)
bool checkSshSignature(const std::vector<SSHKey> & keys, std::string_view signature, std::string_view original)
{
for (const auto & key: keys)
if (key.isPublic() && key.verifySignature(signature, original))
@ -114,7 +115,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -145,7 +150,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -178,7 +187,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
@ -216,13 +229,18 @@ bool Authentication::areCredentialsValid(
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName());
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
}
}
#if USE_SSH
if (const auto * ssh_credentials = typeid_cast<const SshCredentials *>(&credentials))
{
switch (auth_data.getType())
@ -243,15 +261,12 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<SSLCertificateCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
#if USE_SSH
return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal());
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
case AuthenticationType::MAX:
break;
}
}
#endif
if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast<const AlwaysAllowCredentials *>(&credentials))
return true;

View File

@ -105,7 +105,10 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme)
#if USE_SSH
&& (lhs.ssh_keys == rhs.ssh_keys)
#endif
&& (lhs.http_auth_scheme == rhs.http_auth_scheme)
&& (lhs.http_auth_server_name == rhs.http_auth_server_name);
}
@ -326,7 +329,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
break;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
case AuthenticationType::HTTP:
@ -355,7 +358,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
{
#if USE_SSH
AuthenticationData auth_data(*query.type);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
size_t args_size = query.children.size();
for (size_t i = 0; i < args_size; ++i)
@ -366,7 +369,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(key_base64, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(key_base64, type));
}
catch (const std::invalid_argument &)
{
@ -377,7 +380,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
auth_data.setSSHKeys(std::move(keys));
return auth_data;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}

View File

@ -2,14 +2,16 @@
#include <Access/Common/AuthenticationType.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Common/SSHWrapper.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Common/SSH/Wrappers.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include "config.h"
namespace DB
{
@ -59,8 +61,10 @@ public:
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; }
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_);
const std::vector<ssh::SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<ssh::SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<ssh::SSHKey>>(ssh_keys_); }
#if USE_SSH
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<SSHKey>>(ssh_keys_); }
#endif
HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; }
void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; }
@ -94,7 +98,9 @@ private:
String kerberos_realm;
boost::container::flat_set<String> ssl_certificate_common_names;
String salt;
std::vector<ssh::SSHKey> ssh_keys;
#if USE_SSH
std::vector<SSHKey> ssh_keys;
#endif
/// HTTP authentication properties
String http_auth_server_name;
HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC;

View File

@ -34,8 +34,8 @@ enum class AuthenticationType
/// Password is encrypted in bcrypt hash.
BCRYPT_PASSWORD,
/// Server sends a random string named `challenge` which client needs to encrypt with private key.
/// The check is performed on server side by decrypting the data and comparing with the original string.
/// Server sends a random string named `challenge` to the client. The client encrypts it with its SSH private key.
/// The server decrypts the result using the SSH public key registered for the user and compares with the original string.
SSH_KEY,
/// Authentication through HTTP protocol

View File

@ -3,6 +3,7 @@
#include <base/types.h>
#include <memory>
#include "config.h"
namespace DB
{
@ -86,10 +87,11 @@ class MySQLNative41Credentials : public CredentialsWithScramble
using CredentialsWithScramble::CredentialsWithScramble;
};
#if USE_SSH
class SshCredentials : public Credentials
{
public:
explicit SshCredentials(const String& user_name_, const String& signature_, const String& original_)
SshCredentials(const String & user_name_, const String & signature_, const String & original_)
: Credentials(user_name_), signature(signature_), original(original_)
{
is_ready = true;
@ -117,5 +119,6 @@ private:
String signature;
String original;
};
#endif
}

View File

@ -31,7 +31,7 @@ void User::setName(const String & name_)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name is empty");
if (name_ == EncodedUserInfo::USER_INTERSERVER_MARKER)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
if (startsWith(name_, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
name = name_;
}

View File

@ -1,6 +1,5 @@
#include <Access/UsersConfigAccessStorage.h>
#include <Access/Quota.h>
#include <Common/SSH/Wrappers.h>
#include <Access/RowPolicy.h>
#include <Access/User.h>
#include <Access/Role.h>
@ -10,6 +9,7 @@
#include <Access/AccessChangesNotifier.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/SSHWrapper.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/transformEndianness.h>
@ -214,7 +214,7 @@ namespace
Poco::Util::AbstractConfiguration::Keys entries;
config.keys(ssh_keys_config, entries);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
for (const String& entry : entries)
{
const auto conf_pref = ssh_keys_config + "." + entry + ".";
@ -237,7 +237,7 @@ namespace
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(base64_key, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type));
}
catch (const std::invalid_argument &)
{
@ -249,7 +249,7 @@ namespace
}
user->auth_data.setSSHKeys(std::move(keys));
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
else if (has_http_auth)

View File

@ -12,24 +12,6 @@ namespace DB
namespace
{
const std::unordered_set<String> possibly_injective_function_names
{
"dictGet",
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>
{
using Base = InDepthQueryTreeVisitorWithContext<OptimizeGroupByInjectiveFunctionsVisitor>;

View File

@ -2275,6 +2275,10 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_
*/
void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope)
{
const auto & settings = scope.context->getSettingsRef();
if (!settings.enable_positional_arguments || scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY)
return;
auto & node_list_typed = node_list->as<ListNode &>();
for (auto & node : node_list_typed.getNodes())
@ -2287,7 +2291,8 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_
auto * constant_node = (*node_to_replace)->as<ConstantNode>();
if (!constant_node
|| (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64))
|| (constant_node->getValue().getType() != Field::Types::UInt64
&& constant_node->getValue().getType() != Field::Types::Int64))
continue;
UInt64 pos;
@ -6681,15 +6686,12 @@ void expandTuplesInList(QueryTreeNodes & key_list)
*/
void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope)
{
const auto & settings = scope.context->getSettingsRef();
if (query_node_typed.isGroupByWithGroupingSets())
{
QueryTreeNodes nullable_group_by_keys;
for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
@ -6708,8 +6710,7 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
}
else
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
@ -7860,8 +7861,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.isCTE())
cte_in_resolve_process.insert(query_node_typed.getCTEName());
const auto & settings = scope.context->getSettingsRef();
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
if (query_node_typed.isGroupByWithGroupingSets()
@ -8045,8 +8044,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasOrderBy())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
const auto & settings = scope.context->getSettingsRef();
expandOrderByAll(query_node_typed, settings);
resolveSortNodeList(query_node_typed.getOrderByNode(), scope);
@ -8069,8 +8069,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasLimitBy())
{
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope);
replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope);
resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
}

View File

@ -760,4 +760,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node;
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context)
{
const auto & storage_snapshot = table_node->as<TableNode>()->getStorageSnapshot();
auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
size_t columns_to_select_size = columns_to_select.size();
auto column_nodes_to_select = std::make_shared<ListNode>();
column_nodes_to_select->getNodes().reserve(columns_to_select_size);
NamesAndTypes projection_columns;
projection_columns.reserve(columns_to_select_size);
for (auto & column : columns_to_select)
{
column_nodes_to_select->getNodes().emplace_back(std::make_shared<ColumnNode>(column, table_node));
projection_columns.emplace_back(column.name, column.type);
}
auto subquery_for_table = std::make_shared<QueryNode>(Context::createCopy(context));
subquery_for_table->setIsSubquery(true);
subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select);
subquery_for_table->getJoinTree() = std::move(table_node);
subquery_for_table->resolveProjectionColumns(std::move(projection_columns));
return subquery_for_table;
}
}

View File

@ -105,4 +105,7 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Build subquery which we execute for `IN table` function.
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context);
}

View File

@ -85,7 +85,6 @@ add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io Common/Scheduler)
add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes)
add_headers_and_sources(clickhouse_common_io Common/SSH)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
@ -99,7 +98,6 @@ add_headers_and_sources(clickhouse_compression Core)
#Included these specific files to avoid linking grpc
add_glob(clickhouse_compression_headers Server/ServerType.h)
add_glob(clickhouse_compression_sources Server/ServerType.cpp)
add_headers_and_sources(clickhouse_compression Common/SSH)
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
@ -370,8 +368,7 @@ if (TARGET ch_contrib::crc32-vpmsum)
endif()
if (TARGET ch_contrib::ssh)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh)
endif()
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)

View File

@ -712,11 +712,20 @@ void ClientBase::adjustSettings()
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager.
if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed)
/// Do not limit pretty format output in case of --pager specified.
if (!pager.empty())
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed)
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
}
if (!global_context->getSettingsRef().output_format_pretty_max_value_width.changed)
{
settings.output_format_pretty_max_value_width = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_value_width.changed = false;
}
}
global_context->setSettings(settings);

View File

@ -67,7 +67,7 @@ Connection::~Connection() = default;
Connection::Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
const ssh::SSHKey & ssh_private_key_,
[[maybe_unused]] const SSHKey & ssh_private_key_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -76,7 +76,9 @@ Connection::Connection(const String & host_, UInt16 port_,
Protocol::Secure secure_)
: host(host_), port(port_), default_database(default_database_)
, user(user_), password(password_)
#if USE_SSH
, ssh_private_key(ssh_private_key_)
#endif
, quota_key(quota_key_)
, cluster(cluster_)
, cluster_secret(cluster_secret_)
@ -276,17 +278,6 @@ void Connection::disconnect()
}
String Connection::packStringForSshSign(String challenge)
{
String message;
message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION));
message.append(default_database);
message.append(user);
message.append(challenge);
return message;
}
void Connection::sendHello()
{
/** Disallow control characters in user controlled parameters
@ -334,10 +325,10 @@ void Connection::sendHello()
#endif
}
#if USE_SSH
/// Just inform server that we will authenticate using SSH keys.
else if (!ssh_private_key.isEmpty())
{
writeStringBinary(fmt::format("{}{}", EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER, user), *out);
/// Inform server that we will authenticate using SSH keys.
writeStringBinary(String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) + user, *out);
writeStringBinary(password, *out);
performHandshakeForSSHAuth();
@ -361,9 +352,9 @@ void Connection::sendAddendum()
}
#if USE_SSH
void Connection::performHandshakeForSSHAuth()
{
#if USE_SSH
String challenge;
{
writeVarUInt(Protocol::Client::SSHChallengeRequest, *out);
@ -388,12 +379,23 @@ void Connection::performHandshakeForSSHAuth()
}
writeVarUInt(Protocol::Client::SSHChallengeResponse, *out);
String to_sign = packStringForSshSign(challenge);
auto pack_string_for_ssh_sign = [&](String challenge_)
{
String message;
message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION));
message.append(default_database);
message.append(user);
message.append(challenge_);
return message;
};
String to_sign = pack_string_for_ssh_sign(challenge);
String signature = ssh_private_key.signString(to_sign);
writeStringBinary(signature, *out);
out->next();
#endif
}
#endif
void Connection::receiveHello(const Poco::Timespan & handshake_timeout)

View File

@ -1,10 +1,9 @@
#pragma once
#include <Poco/Net/StreamSocket.h>
#include <Common/SSH/Wrappers.h>
#include <Common/callOnce.h>
#include <Common/SSHWrapper.h>
#include <Client/IServerConnection.h>
#include <Core/Defines.h>
@ -53,7 +52,7 @@ public:
Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
const ssh::SSHKey & ssh_private_key_,
const SSHKey & ssh_private_key_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -170,7 +169,9 @@ private:
String default_database;
String user;
String password;
ssh::SSHKey ssh_private_key;
#if USE_SSH
SSHKey ssh_private_key;
#endif
String quota_key;
/// For inter-server authorization
@ -265,9 +266,10 @@ private:
void connect(const ConnectionTimeouts & timeouts);
void sendHello();
String packStringForSshSign(String challenge);
#if USE_SSH
void performHandshakeForSSHAuth();
#endif
void sendAddendum();
void receiveHello(const Poco::Timespan & handshake_timeout);

View File

@ -1,11 +1,10 @@
#include "ConnectionParameters.h"
#include <fstream>
#include <Core/Defines.h>
#include <Core/Protocol.h>
#include <Core/Types.h>
#include <IO/ConnectionTimeouts.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/SSH/Wrappers.h>
#include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Common/DNSResolver.h>
@ -88,19 +87,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
}
else
{
std::string prompt{"Enter your private key passphrase (leave empty for no passphrase): "};
std::string prompt{"Enter your SSH private key passphrase (leave empty for no passphrase): "};
char buf[1000] = {};
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
passphrase = result;
}
ssh::SSHKey key = ssh::SSHKeyFactory::makePrivateFromFile(filename, passphrase);
SSHKey key = SSHKeyFactory::makePrivateKeyFromFile(filename, passphrase);
if (!key.isPrivate())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found public key in file: {} but expected private", filename);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} did not contain a private key (is it a public key?)", filename);
ssh_private_key = std::move(key);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}

View File

@ -1,9 +1,10 @@
#pragma once
#include <string>
#include <Common/SSHWrapper.h>
#include <Core/Protocol.h>
#include <IO/ConnectionTimeouts.h>
#include <Common/SSH/Wrappers.h>
#include <string>
namespace Poco::Util
{
@ -20,7 +21,7 @@ struct ConnectionParameters
std::string user;
std::string password;
std::string quota_key;
ssh::SSHKey ssh_private_key;
SSHKey ssh_private_key;
Protocol::Secure security = Protocol::Secure::Disable;
Protocol::Compression compression = Protocol::Compression::Enable;
ConnectionTimeouts timeouts;

View File

@ -123,7 +123,7 @@ protected:
{
return std::make_shared<Connection>(
host, port,
default_database, user, password, ssh::SSHKey(), quota_key,
default_database, user, password, SSHKey(), quota_key,
cluster, cluster_secret,
client_name, compression, secure);
}

View File

@ -597,6 +597,7 @@
M(716, CANNOT_FORGET_PARTITION) \
M(717, EXPERIMENTAL_FEATURE_ERROR) \
M(718, TOO_SLOW_PARSING) \
M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
\
M(900, DISTRIBUTED_CACHE_ERROR) \
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \

View File

@ -1,4 +1,5 @@
#include <Common/SSH/Wrappers.h>
#include <Common/SSHWrapper.h>
# if USE_SSH
# include <stdexcept>
@ -10,6 +11,14 @@
# pragma clang diagnostic pop
namespace DB
{
namespace ErrorCodes
{
extern const int LIBSSH_ERROR;
}
namespace
{
@ -18,17 +27,19 @@ class SSHString
public:
explicit SSHString(std::string_view input)
{
string = ssh_string_new(input.size());
ssh_string_fill(string, input.data(), input.size());
if (string = ssh_string_new(input.size()); string == nullptr)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString");
if (int rc = ssh_string_fill(string, input.data(), input.size()); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString");
}
explicit SSHString(ssh_string c_other) { string = c_other; }
explicit SSHString(ssh_string other) { string = other; }
ssh_string get() { return string; }
String toString()
{
return String(ssh_string_get_char(string), ssh_string_len(string));
return {ssh_string_get_char(string), ssh_string_len(string)};
}
~SSHString()
@ -42,46 +53,28 @@ private:
}
namespace DB
{
namespace ErrorCodes
{
extern const int LIBSSH_ERROR;
}
namespace ssh
{
SSHKey SSHKeyFactory::makePrivateFromFile(String filename, String passphrase)
SSHKey SSHKeyFactory::makePrivateKeyFromFile(String filename, String passphrase)
{
ssh_key key;
int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key);
if (rc != SSH_OK)
{
if (int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH private key from file");
}
return SSHKey(key);
}
SSHKey SSHKeyFactory::makePublicFromFile(String filename)
SSHKey SSHKeyFactory::makePublicKeyFromFile(String filename)
{
ssh_key key;
int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key);
if (rc != SSH_OK)
if (int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH public key from file");
return SSHKey(key);
}
SSHKey SSHKeyFactory::makePublicFromBase64(String base64_key, String type_name)
SSHKey SSHKeyFactory::makePublicKeyFromBase64(String base64_key, String type_name)
{
ssh_key key;
auto key_type = ssh_key_type_from_name(type_name.c_str());
int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key);
if (rc != SSH_OK)
if (int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Bad SSH public key provided");
return SSHKey(key);
}
@ -90,6 +83,12 @@ SSHKey::SSHKey(const SSHKey & other)
key = ssh_key_dup(other.key);
}
SSHKey::SSHKey(SSHKey && other) noexcept
{
key = other.key;
other.key = nullptr;
}
SSHKey & SSHKey::operator=(const SSHKey & other)
{
ssh_key_free(key);
@ -119,13 +118,11 @@ bool SSHKey::isEqual(const SSHKey & other) const
String SSHKey::signString(std::string_view input) const
{
SSHString input_str(input);
ssh_string c_output = nullptr;
int rc = pki_sign_string(key, input_str.get(), &c_output);
if (rc != SSH_OK)
ssh_string output = nullptr;
if (int rc = pki_sign_string(key, input_str.get(), &output); rc != SSH_OK)
throw Exception(ErrorCodes::LIBSSH_ERROR, "Error singing with ssh key");
SSHString output(c_output);
return output.toString();
SSHString output_str(output);
return output_str.toString();
}
bool SSHKey::verifySignature(std::string_view signature, std::string_view original) const
@ -149,18 +146,15 @@ namespace
{
struct CStringDeleter
{
[[maybe_unused]] void operator()(char * ptr) const { std::free(ptr); }
void operator()(char * ptr) const { std::free(ptr); }
};
}
String SSHKey::getBase64() const
{
char * buf = nullptr;
int rc = ssh_pki_export_pubkey_base64(key, &buf);
if (rc != SSH_OK)
if (int rc = ssh_pki_export_pubkey_base64(key, &buf); rc != SSH_OK)
throw DB::Exception(DB::ErrorCodes::LIBSSH_ERROR, "Failed to export public key to base64");
/// Create a String from cstring, which makes a copy of the first one and requires freeing memory after it
/// This is to safely manage buf memory
std::unique_ptr<char, CStringDeleter> buf_ptr(buf);
@ -177,7 +171,6 @@ SSHKey::~SSHKey()
ssh_key_free(key); // it's safe free from libssh
}
}
}
#endif

View File

@ -1,20 +1,18 @@
#pragma once
#include <Common/Exception.h>
#include "config.h"
#if USE_SSH
# include <string_view>
# include <base/types.h>
#include <Common/Exception.h>
#include <string_view>
#include <base/types.h>
#include "config.h"
#if USE_SSH
using ssh_key = struct ssh_key_struct *;
namespace DB
{
namespace ssh
{
class SSHKeyFactory;
class SSHKey
{
public:
@ -22,11 +20,7 @@ public:
~SSHKey();
SSHKey(const SSHKey & other);
SSHKey(SSHKey && other) noexcept
{
key = other.key;
other.key = nullptr;
}
SSHKey(SSHKey && other) noexcept;
SSHKey & operator=(const SSHKey & other);
SSHKey & operator=(SSHKey && other) noexcept;
@ -43,7 +37,7 @@ public:
String getBase64() const;
String getKeyType() const;
friend SSHKeyFactory;
friend class SSHKeyFactory;
private:
explicit SSHKey(ssh_key key_) : key(key_) { }
ssh_key key = nullptr;
@ -56,17 +50,14 @@ public:
/// The check whether the path is allowed to read for ClickHouse has
/// (e.g. a file is inside `user_files` directory)
/// to be done outside of this functions.
static SSHKey makePrivateFromFile(String filename, String passphrase);
static SSHKey makePublicFromFile(String filename);
static SSHKey makePublicFromBase64(String base64_key, String type_name);
static SSHKey makePrivateKeyFromFile(String filename, String passphrase);
static SSHKey makePublicKeyFromFile(String filename);
static SSHKey makePublicKeyFromBase64(String base64_key, String type_name);
};
}
}
#else
namespace ssh
{
class SSHKey
{
public:
@ -74,5 +65,4 @@ public:
[[ noreturn ]] bool isEmpty() { std::terminate(); }
[[ noreturn ]] String signString(std::string_view) const { std::terminate(); }
};
}
#endif

View File

@ -205,7 +205,7 @@ static void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexe
else
{
size_t hash_value = global_map.hash(*it);
size_t bucket = global_map.getBucketFromHash(hash_value);
size_t bucket = MapTwoLevel::getBucketFromHash(hash_value);
if (mutexes[bucket].try_lock())
{

View File

@ -56,10 +56,11 @@ namespace DB
namespace EncodedUserInfo
{
/// Marker of the inter-server secret (passed in the user name)
/// Marker for the inter-server secret (passed as the user name)
/// (anyway user cannot be started with a whitespace)
const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET ";
/// Marker of the SSH keys based authentication (passed in the user name)
/// Marker for SSH-keys-based authentication (passed as the user name)
const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION ";
};
@ -160,8 +161,8 @@ namespace Protocol
ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster)
MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read
SSHChallengeRequest = 11, /// Request for SSH signature challenge
SSHChallengeResponse = 12, /// Request for SSH signature challenge
SSHChallengeRequest = 11, /// Request SSH signature challenge
SSHChallengeResponse = 12, /// Reply to SSH signature challenge
MAX = SSHChallengeResponse,
};

View File

@ -669,6 +669,7 @@ class IColumn;
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \
M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, "How the query cache handles queries against system tables, i.e. tables in databases 'system.*' and 'information_schema.*'", 0) \
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \

View File

@ -87,6 +87,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{
{"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
{"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
{"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
}},
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},

View File

@ -87,6 +87,10 @@ IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::B
{"save", QueryCacheNondeterministicFunctionHandling::Save},
{"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}})
IMPLEMENT_SETTING_ENUM(QueryCacheSystemTableHandling, ErrorCodes::BAD_ARGUMENTS,
{{"throw", QueryCacheSystemTableHandling::Throw},
{"save", QueryCacheSystemTableHandling::Save},
{"ignore", QueryCacheSystemTableHandling::Ignore}})
IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS,
{{"basic", FormatSettings::DateTimeInputFormat::Basic},

View File

@ -184,6 +184,15 @@ enum class QueryCacheNondeterministicFunctionHandling
DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling)
/// How the query cache handles queries against system tables, tables in databases 'system.*' and 'information_schema.*'
enum class QueryCacheSystemTableHandling
{
Throw,
Save,
Ignore
};
DECLARE_SETTING_ENUM(QueryCacheSystemTableHandling)
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat)

View File

@ -332,6 +332,7 @@ private:
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
UInt32 thread_num,
ThreadStatus * thread_ptr) const
try
{
ThreadStatus thread_status;
@ -519,7 +520,7 @@ private:
}
}
/// ClickHouse Keeper does not link to some part of Settings.
/// ClickHouse Keeper does not link to some parts of Settings.
#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD
/// List changed settings.
if (!query_id.empty())
@ -537,12 +538,18 @@ private:
}
#endif
/// When everything is done, we will try to send these error messages to client.
/// When everything is done, we will try to send these error messages to the client.
if (thread_ptr)
thread_ptr->onFatalError();
fatal_error_printed.test_and_set();
}
catch (...)
{
/// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors.
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message);
}
};

View File

@ -280,7 +280,7 @@ public:
/**
At compile time, result is unknown. We only know the Scale (number of fractional digits) at runtime.
Also nothing is known about size of whole part.
As in simple division/multiplication for decimals, we scale the result up, but is is explicit here and no downscale is performed.
As in simple division/multiplication for decimals, we scale the result up, but it is explicit here and no downscale is performed.
It guarantees that result will have given scale and it can also be MANUALLY converted to other decimal types later.
**/
if (scale > DecimalUtils::max_precision<Decimal256>)

View File

@ -32,7 +32,7 @@ namespace
#endif
/// Get the host name. Is is constant on single server, but is not constant in distributed queries.
/// Get the host name. It is constant on single server, but is not constant in distributed queries.
class FunctionHostName : public FunctionConstantBase<FunctionHostName, String, DataTypeString>
{
public:

View File

@ -2,11 +2,17 @@
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/IAST.h>
#include <Parsers/IParser.h>
#include <Parsers/TokenIterator.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseDatabaseAndTableName.h>
#include <Common/ProfileEvents.h>
#include <Common/SipHash.h>
#include <Common/TTLCachePolicy.h>
@ -52,7 +58,54 @@ struct HasNonDeterministicFunctionsMatcher
}
};
struct HasSystemTablesMatcher
{
struct Data
{
const ContextPtr context;
bool has_system_tables = false;
};
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
static void visit(const ASTPtr & node, Data & data)
{
if (data.has_system_tables)
return;
String database_table; /// or whatever else we get, e.g. just a table
/// SELECT [...] FROM <table>
if (const auto * table_identifier = node->as<ASTTableIdentifier>())
{
database_table = table_identifier->name();
}
/// SELECT [...] FROM clusterAllReplicas(<cluster>, <table>)
else if (const auto * identifier = node->as<ASTIdentifier>())
{
database_table = identifier->name();
}
/// Handle SELECT [...] FROM clusterAllReplicas(<cluster>, '<table>')
else if (const auto * literal = node->as<ASTLiteral>())
{
const auto & value = literal->value;
database_table = toString(value);
}
Tokens tokens(database_table.c_str(), database_table.c_str() + database_table.size(), /*max_query_size*/ 2048, /*skip_insignificant*/ true);
IParser::Pos pos(tokens, /*max_depth*/ 42, /*max_backtracks*/ 42);
Expected expected;
String database;
String table;
bool successfully_parsed = parseDatabaseAndTableName(pos, expected, database, table);
if (successfully_parsed)
if (DatabaseCatalog::isPredefinedDatabase(database))
data.has_system_tables = true;
}
};
using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor<HasNonDeterministicFunctionsMatcher, true>;
using HasSystemTablesVisitor = InDepthNodeVisitor<HasSystemTablesMatcher, true>;
}
@ -63,6 +116,13 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context)
return finder_data.has_non_deterministic_functions;
}
bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
{
HasSystemTablesMatcher::Data finder_data{context};
HasSystemTablesVisitor(finder_data).visit(ast);
return finder_data.has_system_tables;
}
namespace
{

View File

@ -17,6 +17,9 @@ namespace DB
/// Does AST contain non-deterministic functions like rand() and now()?
bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
/// Does AST contain system tables like "system.processes"?
bool astContainsSystemTables(ASTPtr ast, ContextPtr context);
/// Maps queries to query results. Useful to avoid repeated query calculation.
///
/// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated

View File

@ -519,7 +519,8 @@ BlockIO InterpreterInsertQuery::execute()
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
/// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
const bool resize_to_max_insert_threads = !table->isView() && views.empty();
/// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
: std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());

View File

@ -97,6 +97,7 @@ namespace DB
namespace ErrorCodes
{
extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS;
extern const int QUERY_CACHE_USED_WITH_SYSTEM_TABLE;
extern const int INTO_OUTFILE_NOT_ALLOWED;
extern const int INVALID_TRANSACTION;
extern const int LOGICAL_ERROR;
@ -1187,15 +1188,26 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// top of the pipeline which stores the result in the query cache.
if (can_use_query_cache && settings.enable_writes_to_query_cache)
{
/// Only use the query cache if the query does not contain non-deterministic functions or system tables (which are typically non-deterministic)
const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context);
const bool ast_contains_system_tables = astContainsSystemTables(ast, context);
const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling;
const QueryCacheSystemTableHandling system_table_handling = settings.query_cache_system_table_handling;
if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw)
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS,
"The query result was not cached because the query contains a non-deterministic function."
" Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
if (ast_contains_system_tables && system_table_handling == QueryCacheSystemTableHandling::Throw)
throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_SYSTEM_TABLE,
"The query result was not cached because the query contains a system table."
" Use setting `query_cache_system_table_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching");
if ((!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save)
&& (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
{
QueryCache::Key key(
ast, res.pipeline.getHeader(),

View File

@ -1,6 +1,6 @@
#include <Parsers/Access/ParserPublicSSHKey.h>
#include <Parsers/Access/ASTPublicSSHKey.h>
#include <Parsers/Access/ASTPublicSSHKey.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>

View File

@ -89,27 +89,8 @@ public:
return;
auto subquery_to_execute = in_second_argument;
if (auto * table_node = in_second_argument->as<TableNode>())
{
auto storage_snapshot = table_node->getStorageSnapshot();
auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
size_t columns_to_select_size = columns_to_select.size();
auto column_nodes_to_select = std::make_shared<ListNode>();
column_nodes_to_select->getNodes().reserve(columns_to_select_size);
NamesAndTypes projection_columns;
projection_columns.reserve(columns_to_select_size);
for (auto & column : columns_to_select)
{
column_nodes_to_select->getNodes().emplace_back(std::make_shared<ColumnNode>(column, subquery_to_execute));
projection_columns.emplace_back(column.name, column.type);
}
auto subquery_for_table = std::make_shared<QueryNode>(Context::createCopy(planner_context.getQueryContext()));
subquery_for_table->setIsSubquery(true);
subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select);
subquery_for_table->getJoinTree() = std::move(subquery_to_execute);
subquery_for_table->resolveProjectionColumns(std::move(projection_columns));
subquery_to_execute = std::move(subquery_for_table);
}
if (in_second_argument->as<TableNode>())
subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context.getQueryContext());
sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings);
}

View File

@ -68,6 +68,10 @@ std::unique_ptr<QueryPlan> createLocalPlan(
if (context->getSettingsRef().allow_experimental_analyzer)
{
/// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to
/// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace
/// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289).
new_context->setSetting("enable_positional_arguments", Field(false));
auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
}

View File

@ -13,8 +13,6 @@
#include <QueryPipeline/ReadProgressCallback.h>
#include <Columns/ColumnConst.h>
#include <QueryPipeline/printPipeline.h>
namespace DB
{

View File

@ -1371,17 +1371,6 @@ std::string formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(const Poco::Ut
return result;
}
[[ maybe_unused ]] String createChallenge()
{
#if USE_SSL
pcg64_fast rng(randomSeed());
UInt64 rand = rng();
return encodeSHA256(&rand, sizeof(rand));
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Can't generate challenge, because ClickHouse was built without OpenSSL");
#endif
}
}
std::unique_ptr<Session> TCPHandler::makeSession()
@ -1399,16 +1388,6 @@ std::unique_ptr<Session> TCPHandler::makeSession()
return res;
}
String TCPHandler::prepareStringForSshValidation(String username, String challenge)
{
String output;
output.append(std::to_string(client_tcp_protocol_version));
output.append(default_database);
output.append(username);
output.append(challenge);
return output;
}
void TCPHandler::receiveHello()
{
/// Receive `hello` packet.
@ -1466,11 +1445,9 @@ void TCPHandler::receiveHello()
return;
}
is_ssh_based_auth = startsWith(user, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty();
is_ssh_based_auth = user.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty();
if (is_ssh_based_auth)
{
user.erase(0, String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size());
}
user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size());
session = makeSession();
const auto & client_info = session->getClientInfo();
@ -1498,7 +1475,9 @@ void TCPHandler::receiveHello()
}
}
}
#endif
#if USE_SSH
/// Perform handshake for SSH authentication
if (is_ssh_based_auth)
{
@ -1512,7 +1491,14 @@ void TCPHandler::receiveHello()
if (packet_type != Protocol::Client::SSHChallengeRequest)
throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet for requesting a challenge string");
auto challenge = createChallenge();
auto create_challenge = []()
{
pcg64_fast rng(randomSeed());
UInt64 rand = rng();
return encodeSHA256(&rand, sizeof(rand));
};
String challenge = create_challenge();
writeVarUInt(Protocol::Server::SSHChallenge, *out);
writeStringBinary(challenge, *out);
out->next();
@ -1523,7 +1509,17 @@ void TCPHandler::receiveHello()
throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet with a response for a challenge");
readStringBinary(signature, *in);
auto cred = SshCredentials(user, signature, prepareStringForSshValidation(user, challenge));
auto prepare_string_for_ssh_validation = [&](const String & username, const String & challenge_)
{
String output;
output.append(std::to_string(client_tcp_protocol_version));
output.append(default_database);
output.append(username);
output.append(challenge_);
return output;
};
auto cred = SshCredentials(user, signature, prepare_string_for_ssh_validation(user, challenge));
session->authenticate(cred, getClientAddress(client_info));
return;
}

View File

@ -216,7 +216,7 @@ private:
String default_database;
bool is_ssh_based_auth = false;
bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge
/// For inter-server secret (remote_server.*.secret)
bool is_interserver_mode = false;
bool is_interserver_authenticated = false;
@ -248,7 +248,6 @@ private:
void extractConnectionSettingsFromContext(const ContextPtr & context);
std::unique_ptr<Session> makeSession();
String prepareStringForSshValidation(String user, String challenge);
bool receiveProxyHeader();
void receiveHello();

View File

@ -152,7 +152,7 @@ StorageFileLog::StorageFileLog(
if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath()))
{
if (LoadingStrictnessLevel::ATTACH <= mode)
if (LoadingStrictnessLevel::SECONDARY_CREATE <= mode)
{
LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath());
return;

View File

@ -1,6 +1,5 @@
#include <Storages/MemorySettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTFunction.h>
#include <Common/Exception.h>
@ -11,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNKNOWN_SETTING;
extern const int SETTING_CONSTRAINT_VIOLATION;
}
IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
@ -32,5 +32,31 @@ void MemorySettings::loadFromQuery(ASTStorage & storage_def)
}
}
ASTPtr MemorySettings::getSettingsChangesQuery()
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
for (const auto & change : changes())
settings_ast->changes.push_back(change);
return settings_ast;
}
void MemorySettings::sanityCheck() const
{
if (min_bytes_to_keep > max_bytes_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
"Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}",
min_bytes_to_keep,
max_bytes_to_keep);
if (min_rows_to_keep > max_rows_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
"Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}",
min_rows_to_keep,
max_rows_to_keep);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Parsers/ASTSetQuery.h>
namespace DB
@ -24,6 +25,8 @@ DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS)
struct MemorySettings : public BaseSettings<memorySettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
ASTPtr getSettingsChangesQuery();
void sanityCheck() const;
};
}

View File

@ -46,7 +46,6 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int CANNOT_RESTORE_TABLE;
extern const int NOT_IMPLEMENTED;
extern const int SETTING_CONSTRAINT_VIOLATION;
}
class MemorySink : public SinkToStorage
@ -76,7 +75,7 @@ public:
convertDynamicColumnsToTuples(block, storage_snapshot);
}
if (storage.compress)
if (storage.getMemorySettingsRef().compress)
{
Block compressed_block;
for (const auto & elem : block)
@ -106,15 +105,16 @@ public:
auto new_data = std::make_unique<Blocks>(*(storage.data.get()));
UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows;
UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes;
const auto & memory_settings = storage.getMemorySettingsRef();
while (!new_data->empty()
&& ((storage.max_bytes_to_keep && new_total_bytes > storage.max_bytes_to_keep)
|| (storage.max_rows_to_keep && new_total_rows > storage.max_rows_to_keep)))
&& ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep)
|| (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep)))
{
Block oldest_block = new_data->front();
UInt64 rows_to_remove = oldest_block.rows();
UInt64 bytes_to_remove = oldest_block.allocatedBytes();
if (new_total_bytes - bytes_to_remove < storage.min_bytes_to_keep
|| new_total_rows - rows_to_remove < storage.min_rows_to_keep)
if (new_total_bytes - bytes_to_remove < memory_settings.min_bytes_to_keep
|| new_total_rows - rows_to_remove < memory_settings.min_rows_to_keep)
{
break; // stop - removing next block will put us under min_bytes / min_rows threshold
}
@ -145,15 +145,16 @@ StorageMemory::StorageMemory(
ColumnsDescription columns_description_,
ConstraintsDescription constraints_,
const String & comment,
const MemorySettings & settings)
: IStorage(table_id_), data(std::make_unique<const Blocks>()), compress(settings.compress),
min_rows_to_keep(settings.min_rows_to_keep), max_rows_to_keep(settings.max_rows_to_keep),
min_bytes_to_keep(settings.min_bytes_to_keep), max_bytes_to_keep(settings.max_bytes_to_keep)
const MemorySettings & memory_settings_)
: IStorage(table_id_)
, data(std::make_unique<const Blocks>())
, memory_settings(memory_settings_)
{
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(std::move(columns_description_));
storage_metadata.setConstraints(std::move(constraints_));
storage_metadata.setComment(comment);
storage_metadata.setSettingsChanges(memory_settings.getSettingsChangesQuery());
setInMemoryMetadata(storage_metadata);
}
@ -239,7 +240,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context
Block block;
while (executor.pull(block))
{
if (compress)
if (memory_settings.compress)
for (auto & elem : block)
elem.column = elem.column->compress();
@ -294,6 +295,59 @@ void StorageMemory::truncate(
total_size_rows.store(0, std::memory_order_relaxed);
}
void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr context, DB::IStorage::AlterLockHolder & /*alter_lock_holder*/)
{
auto table_id = getStorageID();
StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
params.apply(new_metadata, context);
if (params.isSettingsAlter())
{
auto & settings_changes = new_metadata.settings_changes->as<ASTSetQuery &>();
auto changed_settings = memory_settings;
changed_settings.applyChanges(settings_changes.changes);
changed_settings.sanityCheck();
/// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values,
/// the old data needs to be removed.
if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > changed_settings.max_bytes_to_keep
|| !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > changed_settings.max_rows_to_keep)
{
std::lock_guard lock(mutex);
auto new_data = std::make_unique<Blocks>(*(data.get()));
UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed);
UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed);
while (!new_data->empty()
&& ((changed_settings.max_bytes_to_keep && new_total_bytes > changed_settings.max_bytes_to_keep)
|| (changed_settings.max_rows_to_keep && new_total_rows > changed_settings.max_rows_to_keep)))
{
Block oldest_block = new_data->front();
UInt64 rows_to_remove = oldest_block.rows();
UInt64 bytes_to_remove = oldest_block.allocatedBytes();
if (new_total_bytes - bytes_to_remove < changed_settings.min_bytes_to_keep
|| new_total_rows - rows_to_remove < changed_settings.min_rows_to_keep)
{
break; // stop - removing next block will put us under min_bytes / min_rows threshold
}
// delete old block from current storage table
new_total_rows -= rows_to_remove;
new_total_bytes -= bytes_to_remove;
new_data->erase(new_data->begin());
}
data.set(std::move(new_data));
total_size_rows.store(new_total_rows, std::memory_order_relaxed);
total_size_bytes.store(new_total_bytes, std::memory_order_relaxed);
}
memory_settings = std::move(changed_settings);
}
DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata);
setInMemoryMetadata(new_metadata);
}
namespace
{
@ -499,7 +553,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat
while (auto block = block_in.read())
{
if (compress)
if (memory_settings.compress)
{
Block compressed_block;
for (const auto & elem : block)
@ -534,7 +588,8 @@ void StorageMemory::checkAlterIsPossible(const AlterCommands & commands, Context
{
if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
&& command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN
&& command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN)
&& command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN
&& command.type != AlterCommand::Type::MODIFY_SETTING)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}",
command.type, getName());
}
@ -566,9 +621,7 @@ void registerStorageMemory(StorageFactory & factory)
if (has_settings)
settings.loadFromQuery(*args.storage_def);
if (settings.min_bytes_to_keep > settings.max_bytes_to_keep
|| settings.min_rows_to_keep > settings.max_rows_to_keep)
throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max.");
settings.sanityCheck();
return std::make_shared<StorageMemory>(args.table_id, args.columns, args.constraints, args.comment, settings);
},

View File

@ -31,7 +31,7 @@ public:
ColumnsDescription columns_description_,
ConstraintsDescription constraints_,
const String & comment,
const MemorySettings & settings = MemorySettings());
const MemorySettings & memory_settings_ = MemorySettings());
String getName() const override { return "Memory"; }
@ -46,6 +46,8 @@ public:
StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override;
const MemorySettings & getMemorySettingsRef() const { return memory_settings; }
void read(
QueryPlan & query_plan,
const Names & column_names,
@ -78,6 +80,7 @@ public:
void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override;
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & alter_lock_holder) override;
std::optional<UInt64> totalRows(const Settings &) const override;
std::optional<UInt64> totalBytes(const Settings &) const override;
@ -134,12 +137,7 @@ private:
std::atomic<size_t> total_size_bytes = 0;
std::atomic<size_t> total_size_rows = 0;
bool compress;
UInt64 min_rows_to_keep;
UInt64 max_rows_to_keep;
UInt64 min_bytes_to_keep;
UInt64 max_bytes_to_keep;
MemorySettings memory_settings;
friend class ReadFromMemoryStorageStep;
};

View File

@ -5675,7 +5675,7 @@ std::optional<QueryPipeline> StorageReplicatedMergeTree::distributedWriteFromClu
{
auto connection = std::make_shared<Connection>(
node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(),
node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
"ParallelInsertSelectInititiator",
node.compression,
node.secure

View File

@ -361,10 +361,14 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex
{
auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1);
auto in_function_node_type = in_function_subquery_node->getNodeType();
if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION)
if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION && in_function_node_type != QueryTreeNodeType::TABLE)
continue;
auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node,
auto subquery_to_execute = in_function_subquery_node;
if (subquery_to_execute->as<TableNode>())
subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context->getQueryContext());
auto temporary_table_expression_node = executeSubqueryNode(subquery_to_execute,
planner_context->getMutableQueryContext(),
global_in_or_join_node.subquery_depth);

View File

@ -1757,6 +1757,32 @@ def _upload_build_profile_data(
logging.error("Failed to insert binary_size_file for the build, continue")
def _add_build_to_version_history(
pr_info: PRInfo,
job_report: JobReport,
version: str,
docker_tag: str,
ch_helper: ClickHouseHelper,
) -> None:
# with some probability we will not silently break this logic
assert pr_info.sha and pr_info.commit_html_url and pr_info.head_ref and version
data = {
"check_start_time": job_report.start_time,
"pull_request_number": pr_info.number,
"pull_request_url": pr_info.pr_html_url,
"commit_sha": pr_info.sha,
"commit_url": pr_info.commit_html_url,
"version": version,
"docker_tag": docker_tag,
"git_ref": pr_info.head_ref,
}
print(f"::notice ::Log Adding record to versions history: {data}")
ch_helper.insert_event_into(db="default", table="version_history", event=data)
def _run_test(job_name: str, run_command: str) -> int:
assert (
run_command or CI_CONFIG.get_job_config(job_name).run_command
@ -2114,6 +2140,15 @@ def main() -> int:
ch_helper.insert_events_into(
db="default", table="checks", events=prepared_events
)
if "DockerServerImage" in args.job_name and indata is not None:
_add_build_to_version_history(
pr_info,
job_report,
indata["version"],
indata["build"],
ch_helper,
)
else:
# no job report
print(f"No job report for {[args.job_name]} - do nothing")

View File

@ -9,7 +9,7 @@ set -xeuo pipefail
echo "Running prepare script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_VERSION=2.313.0
export RUNNER_VERSION=2.315.0
export RUNNER_HOME=/home/ubuntu/actions-runner
deb_arch() {

View File

@ -42,6 +42,17 @@ def test_cluster(start_cluster):
)
def test_global_in(start_cluster):
node1.query("DROP TABLE IF EXISTS u;")
node1.query("CREATE TABLE u(uid Int16) ENGINE=Memory as select 0")
assert set(
node1.query(
"""SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u"""
).splitlines()
) == {"node1\t0", "node2\t0"}
@pytest.mark.parametrize(
"cluster",
[

View File

@ -7,6 +7,7 @@ DROP TABLE IF EXISTS eligible_test2;
-- enable query cache session-wide but also force it individually in each of below statements
SET use_query_cache = true;
SET query_cache_system_table_handling = 'save';
-- check that SELECT statements create entries in the query cache ...
SELECT 1 SETTINGS use_query_cache = true;

View File

@ -2,6 +2,7 @@
-- Tag no-parallel: Messes with internal cache
SET allow_experimental_analyzer = 1;
SET query_cache_system_table_handling = 'save';
SYSTEM DROP QUERY CACHE;

View File

@ -12,11 +12,10 @@ SYSTEM STOP MERGES t_cache_sparse;
INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000);
INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000);
SET use_query_cache = 1;
SET max_threads = 1;
SELECT v FROM t_cache_sparse FORMAT Null;
SELECT v FROM t_cache_sparse FORMAT Null;
SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null;
SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null;
SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%';
DROP TABLE t_cache_sparse;

View File

@ -0,0 +1,13 @@
The Default for query_cache_system_table_handling is = throw
0
Check behavior of query_cache_system_table_handling = throw
0
Check behavior of query_cache_system_table_handling = save
0
1
Check behavior of query_cache_system_table_handling = ignore
0
0
Other tests
0
0

View File

@ -0,0 +1,64 @@
-- Tags: no-parallel
-- Tag no-parallel: Messes with internal cache
SYSTEM DROP QUERY CACHE;
SELECT 'The Default for query_cache_system_table_handling is = throw';
-- Test that the query cache rejects queries that involve system tables.
SELECT * FROM system.one SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
SELECT count(*) FROM system.query_cache;
SYSTEM DROP QUERY CACHE;
SELECT 'Check behavior of query_cache_system_table_handling = throw';
-- Test that the query cache rejects queries that involve system tables.
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
SELECT count(*) FROM system.query_cache;
SYSTEM DROP QUERY CACHE;
SELECT 'Check behavior of query_cache_system_table_handling = save';
-- Test that the query cache saves the result of queries that involve system tables.
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'save';
SELECT count(*) FROM system.query_cache;
SYSTEM DROP QUERY CACHE;
SELECT 'Check behavior of query_cache_system_table_handling = ignore';
-- Test that the query cache ignores the result of queries that involve system tables.
SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'ignore';
SELECT count(*) FROM system.query_cache;
SYSTEM DROP QUERY CACHE;
SELECT 'Other tests';
-- Edge case which doesn't work well due to conceptual reasons (QueryCache is AST-based), test it anyways to have it documented.
USE system;
SELECT * FROM one SETTINGS use_query_cache = 1; -- doesn't throw but should
-- This query uses system.zero internally. Since the query cache works at AST level it does not "see' system.zero and must not complain.
SELECT * SETTINGS use_query_cache = 1;
-- information_schema is also treated as a system table
SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
-- System tables can be "hidden" inside e.g. table functions
SELECT * FROM clusterAllReplicas('test_shard_localhost', system.one) SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
SELECT * FROM clusterAllReplicas('test_shard_localhost', 'system.one') SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
-- Criminal edge case that a user creates a table named "system". The query cache must not reject queries against it.
DROP TABLE IF EXISTS system;
CREATE TABLE system (c UInt64) ENGINE = Memory;
SElECT * FROM system SETTINGS use_query_cache = 1;
DROP TABLE system;
-- But queries against system.system are rejected.
DROP TABLE IF EXISTS system.system;
CREATE TABLE system.system (c UInt64) ENGINE = Memory;
SElECT * FROM system.system SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE }
DROP TABLE system.system;
-- Cleanup
SYSTEM DROP QUERY CACHE;

View File

@ -1,10 +1,16 @@
-- Tags: no-fasttest, no-parallel
-- Tests user authentication with SSH public keys
DROP USER IF EXISTS test_user_02867;
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'clickhouse' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR }
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'clickhouse' TYPE 'clickhouse'; -- { serverError LIBSSH_ERROR }
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'key1' TYPE 'ssh-rsa', KEY 'key2' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR }
-- negative tests
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'invalid_key' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR }
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'invalid_key' TYPE 'ssh-rsa', KEY 'invalid_key' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR }
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key
BY KEY 'AAAAB3NzaC1yc2EAAAADAQABAAABgQCVTUso7/LQcBljfsHwyuL6fWfIvS3BaVpYB8lwf/ZylSOltBy6YlABtTU3mIb197d2DW99RcLKk174f5Zj5rUukXbV0fnufWvwd37fbb1eKM8zxBYvXs53EI5QBPZgKACIzMpYYZeJnAP0oZhUfWWtKXpy/SQ5CHiEIGD9RNYDL+uXZejMwC5r/+f2AmrATBo+Y+WJFZIvhj4uznFYvyvNTUz/YDvZCk+vwwIgiv4BpFCaZm2TeETTj6SvK567bZznLP5HXrkVbB5lhxjAkahc2w/Yjm//Fwto3xsMoJwROxJEU8L1kZ40QWPqjo7Tmr6C/hL2cKDNgWOEqrjLKQmh576s1+PfxwXpVPjLK4PHVSvuJLV88sn0iPdspLlKlDCdc7T9MqIrjJfxuhqnaoFQ7U+oBte8vkm1wGu76+WEC3iNWVAiIVZxLx9rUEsDqj3OovqfLiRsTmNLeY94p2asZjkx7rU48ZwuYN5XGafYsArPscj9Ve6RoRrof+5Q7cc='
TYPE 'invalid_algorithm'; -- { serverError LIBSSH_ERROR }
CREATE USER test_user_02867 IDENTIFIED WITH ssh_key
BY KEY 'AAAAB3NzaC1yc2EAAAADAQABAAABgQCVTUso7/LQcBljfsHwyuL6fWfIvS3BaVpYB8lwf/ZylSOltBy6YlABtTU3mIb197d2DW99RcLKk174f5Zj5rUukXbV0fnufWvwd37fbb1eKM8zxBYvXs53EI5QBPZgKACIzMpYYZeJnAP0oZhUfWWtKXpy/SQ5CHiEIGD9RNYDL+uXZejMwC5r/+f2AmrATBo+Y+WJFZIvhj4uznFYvyvNTUz/YDvZCk+vwwIgiv4BpFCaZm2TeETTj6SvK567bZznLP5HXrkVbB5lhxjAkahc2w/Yjm//Fwto3xsMoJwROxJEU8L1kZ40QWPqjo7Tmr6C/hL2cKDNgWOEqrjLKQmh576s1+PfxwXpVPjLK4PHVSvuJLV88sn0iPdspLlKlDCdc7T9MqIrjJfxuhqnaoFQ7U+oBte8vkm1wGu76+WEC3iNWVAiIVZxLx9rUEsDqj3OovqfLiRsTmNLeY94p2asZjkx7rU48ZwuYN5XGafYsArPscj9Ve6RoRrof+5Q7cc='
TYPE 'ssh-rsa';

View File

@ -0,0 +1,7 @@
select 0 as x
from remote('127.0.0.{1,2}', system.one)
group by x;
select 0 as x
from remote('127.0.0.{1,2}', system.one)
order by x;

View File

@ -0,0 +1,20 @@
TESTING MODIFY SMALLER BYTES
17408
16384
65536
TESTING MODIFY SMALLER ROWS
1100
1000
500
TESTING ADD SETTINGS
50
1000
1070
1020
1100
TESTING ADD SETTINGS
50
1000
1020
1100
TESTING INVALID SETTINGS

View File

@ -0,0 +1,76 @@
SET max_block_size = 65409; -- Default value
SELECT 'TESTING MODIFY SMALLER BYTES';
DROP TABLE IF EXISTS memory;
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 8192, max_bytes_to_keep = 32768;
INSERT INTO memory SELECT * FROM numbers(0, 100); -- 1024 bytes
INSERT INTO memory SELECT * FROM numbers(0, 3000); -- 16384 bytes
SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 17408 in total
ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384;
SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 16384 in total after deleting
INSERT INTO memory SELECT * FROM numbers(3000, 10000); -- 65536 bytes
SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase();
SELECT 'TESTING MODIFY SMALLER ROWS';
DROP TABLE IF EXISTS memory;
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 200, max_rows_to_keep = 2000;
INSERT INTO memory SELECT * FROM numbers(0, 100); -- 100 rows
INSERT INTO memory SELECT * FROM numbers(100, 1000); -- 1000 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total after deleting
INSERT INTO memory SELECT * FROM numbers(1000, 500); -- 500 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 500 in total after deleting
SELECT 'TESTING ADD SETTINGS';
DROP TABLE IF EXISTS memory;
CREATE TABLE memory (i UInt32) ENGINE = Memory;
INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total
INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total
INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1070 in total
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting
INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting
SELECT 'TESTING ADD SETTINGS';
DROP TABLE IF EXISTS memory;
CREATE TABLE memory (i UInt32) ENGINE = Memory;
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total
INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total
INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting
INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows
SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting
SELECT 'TESTING INVALID SETTINGS';
DROP TABLE IF EXISTS memory;
CREATE TABLE memory (i UInt32) ENGINE = Memory;
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 }
ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 }
ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000;
ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000;
DROP TABLE memory;

View File

@ -0,0 +1,9 @@
(3,2,1)
(2,1,0)
(0,0,0)
(3,2,1)
(2,1,0)
(3,2,1)
(0,0,0)
(0,1,1)
(1,0,1)

View File

@ -0,0 +1,13 @@
SELECT tupleIntDiv((15, 10, 5), (0, 0, 0)); -- { serverError ILLEGAL_DIVISION }
SELECT tupleIntDiv((15, 10, 5), (5, 5, 5));
SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5));
SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0)); -- no error thrown for zero divisors
SELECT tupleIntDivByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION }
SELECT tupleIntDivByNumber((15, 10, 5), 5);
SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8);
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5);
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0); -- no error thrown for zero divisors
SELECT tupleModulo((15, 10, 5), (0, 3, 2)); -- { serverError ILLEGAL_DIVISION }
SELECT tupleModulo((15, 10, 5), (5, 3, 2));
SELECT tupleModuloByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION }
SELECT tupleModuloByNumber((15, 10, 5), 2);

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv"
$CLICKHOUSE_CLIENT --max_insert_threads=4 --query="
EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC
" | grep -o MaterializingTransform | wc -l
DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')")
rm $DATA_FILE_PATH

View File

@ -452,6 +452,9 @@ Khanna
KittenHouse
Klickhouse
Kolmogorov
Konstantin
kostik
kostikConsistentHash
Korzeniewski
Kubernetes
LDAP
@ -554,6 +557,17 @@ Mongodb
mortonDecode
mortonEncode
MsgPack
multiSearchAllPositionsCaseInsensitive
multiSearchAllPositionsCaseInsensitiveUTF
multiSearchAnyCaseInsensitive
multiSearchAnyCaseInsensitiveUTF
multiSearchAnyUTF
multiSearchFirstIndexCaseInsensitive
multiSearchFirstIndexCaseInsensitiveUTF
multiSearchFirstIndexUTF
multiSearchFirstPositionCaseInsensitive
multiSearchFirstPositionCaseInsensitiveUTF
multiSearchFirstPositionUTF
MultiPolygon
Multiline
Multiqueries
@ -655,6 +669,7 @@ OTLP
OUTFILE
ObjectId
Observability
Oblakov
Octonica
Ok
OnTime
@ -860,6 +875,7 @@ Soundex
SpanKind
Spearman's
SquaredDistance
SquaredNorm
StartTLS
StartTime
StartupSystemTables
@ -1935,6 +1951,7 @@ mmap
mmapped
modularization
moduloOrZero
moduli
mongodb
monthName
moscow
@ -2646,6 +2663,12 @@ tupleMultiplyByNumber
tupleNegate
tuplePlus
tupleToNameValuePairs
tupleIntDiv
tupleIntDivByNumber
tupleIntDivOrZero
tupleIntDivOrZeroByNumber
tupleModulo
tupleModuloByNumber
turbostat
txt
typename
@ -2760,6 +2783,7 @@ wordShingleSimHashUTF
wordshingleMinHash
writability
wrt
wyHash
xcode
xeus
xkcd