Merge remote-tracking branch 'upstream/master' into HEAD

This commit is contained in:
Anton Popov 2024-10-15 14:56:23 +00:00
commit 8738f57d4d
113 changed files with 930 additions and 729 deletions

View File

@ -15,7 +15,7 @@ assignees: ''
**Use case**
> A clear and concise description of what is the intended usage scenario is.
> A clear and concise description of what the intended usage scenario is.
**Describe the solution you'd like**

View File

@ -11,6 +11,10 @@ option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries n
if (ARCH_NATIVE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
# Populate the ENABLE_ option flags. This is required for the build of some third-party dependencies, specifically snappy, which
# (somewhat weirdly) expects the relative SNAPPY_HAVE_ preprocessor variables to be populated, in addition to the microarchitecture
# feature flags being enabled in the compiler. This fixes the ARCH_NATIVE flag by automatically populating the ENABLE_ option flags
# according to the current CPU's capabilities, detected using clang.
if (ARCH_AMD64)
execute_process(
COMMAND sh -c "clang -E - -march=native -###"

View File

@ -196,7 +196,6 @@ When writing docs, you can use prepared templates. Copy the code of a template a
Templates:
- [Function](_description_templates/template-function.md)
- [Setting](_description_templates/template-setting.md)
- [Server Setting](_description_templates/template-server-setting.md)
- [Database or Table engine](_description_templates/template-engine.md)
- [System table](_description_templates/template-system-table.md)

View File

@ -1,27 +0,0 @@
## setting_name {#setting_name}
Description.
For the switch setting, use the typical phrase: “Enables or disables something ...”.
Possible values:
*For switcher setting:*
- 0 — Disabled.
- 1 — Enabled.
*For another setting (typical phrases):*
- Positive integer.
- 0 — Disabled or unlimited or something else.
Default value: `value`.
**Additional Info** (Optional)
The name of an additional section can be any, for example, **Usage**.
**See Also** (Optional)
- [link](#)

View File

@ -1,11 +0,0 @@
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you've set up a password.

View File

@ -1,6 +0,0 @@
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
sudo yum install -y clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -1,32 +0,0 @@
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
export LATEST_VERSION
case $(uname -m) in
x86_64) ARCH=amd64 ;;
aarch64) ARCH=arm64 ;;
*) echo "Unknown architecture $(uname -m)"; exit 1 ;;
esac
for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client
do
curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \
|| curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
done
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
sudo /etc/init.d/clickhouse-server start
tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"

View File

@ -63,7 +63,34 @@ Currently there are 3 ways to authenticate:
- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url.
- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication.
### Data cache {#data-cache}
`Azure` table engine supports data caching on local disk.
See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
```sql
SELECT *
FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'test_container', 'test_table', 'CSV')
SETTINGS filesystem_cache_name = 'cache_for_azure', enable_filesystem_cache = 1;
```
1. add the following section to clickhouse configuration file:
``` xml
<clickhouse>
<filesystem_caches>
<cache_for_azure>
<path>path to cache directory</path>
<max_size>10Gi</max_size>
</cache_for_azure>
</filesystem_caches>
</clickhouse>
```
2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
## See also

View File

@ -48,6 +48,10 @@ Using named collections:
CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table')
```
### Data cache {#data-cache}
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
## See also
- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)

View File

@ -63,6 +63,10 @@ CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table
Table engine `Iceberg` is an alias to `IcebergS3` now.
### Data cache {#data-cache}
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
## See also
- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)

View File

@ -4,12 +4,8 @@ sidebar_position: 138
sidebar_label: MySQL
---
import CloudAvailableBadge from '@theme/badges/CloudAvailableBadge';
# MySQL Table Engine
<CloudAvailableBadge />
The MySQL engine allows you to perform `SELECT` and `INSERT` queries on data that is stored on a remote MySQL server.
## Creating a Table {#creating-a-table}

View File

@ -26,6 +26,7 @@ SELECT * FROM s3_engine_table LIMIT 2;
│ two │ 2 │
└──────┴───────┘
```
## Create Table {#creating-a-table}
``` sql
@ -43,6 +44,37 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### Data cache {#data-cache}
`S3` table engine supports data caching on local disk.
See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//test_3.csv', 'minioadmin', 'minioadminpassword', 'CSV')
SETTINGS filesystem_cache_name = 'cache_for_s3', enable_filesystem_cache = 1;
```
There are two ways to define cache in configuration file.
1. add the following section to clickhouse configuration file:
``` xml
<clickhouse>
<filesystem_caches>
<cache_for_s3>
<path>path to cache directory</path>
<max_size>10Gi</max_size>
</cache_for_s3>
</filesystem_caches>
</clickhouse>
```
2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
### PARTITION BY
`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).

View File

@ -31,6 +31,10 @@ The table must be enabled in the server configuration, see the `opentelemetry_sp
The tags or attributes are saved as two parallel arrays, containing the keys and values. Use [ARRAY JOIN](../sql-reference/statements/select/array-join.md) to work with them.
## Log-query-settings
ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
## Integration with monitoring systems
At the moment, there is no ready tool that can export the tracing data from ClickHouse to a monitoring system.

View File

@ -532,6 +532,14 @@ Default value: 0
Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()
## log_query_settings {#log-query-settings}
Type: Bool
Default value: 1
Log query settings into the query_log and opentelemetry_span_log.
## allow_nondeterministic_mutations {#allow_nondeterministic_mutations}
Type: Bool
@ -9721,6 +9729,10 @@ Default value: 15
The heartbeat interval in seconds to indicate watch query is alive.
## enforce_strict_identifier_format
If enabled, only allow identifiers containing alphanumeric characters and underscores.
## workload {#workload}
Type: String

View File

@ -1,190 +0,0 @@
---
slug: /en/sql-reference/ansi
sidebar_position: 40
sidebar_label: ANSI Compatibility
title: "ANSI SQL Compatibility of ClickHouse SQL Dialect"
---
:::note
This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8).
:::
## Differences in Behaviour
The following table lists cases when query feature works in ClickHouse, but behaves not as specified in ANSI SQL.
| Feature ID | Feature Name | Difference |
|------------|-----------------------------|-----------------------------------------------------------------------------------------------------------|
| E011 | Numeric data types | Numeric literal with period is interpreted as approximate (`Float64`) instead of exact (`Decimal`) |
| E051-05 | Select items can be renamed | Item renames have a wider visibility scope than just the SELECT result |
| E141-01 | NOT NULL constraints | `NOT NULL` is implied for table columns by default |
| E011-04 | Arithmetic operators | ClickHouse overflows instead of checked arithmetic and changes the result data type based on custom rules |
## Feature Status
| Feature ID | Feature Name | Status | Comment |
|------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **E011** | **Numeric data types** | <span class="text-warning">Partial</span> | |
| E011-01 | INTEGER and SMALLINT data types | <span class="text-success">Yes</span> | |
| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | <span class="text-success">Yes</span> | |
| E011-03 | DECIMAL and NUMERIC data types | <span class="text-success">Yes</span> | |
| E011-04 | Arithmetic operators | <span class="text-success">Yes</span> | |
| E011-05 | Numeric comparison | <span class="text-success">Yes</span> | |
| E011-06 | Implicit casting among the numeric data types | <span class="text-danger">No</span> | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| **E021** | **Character string types** | <span class="text-warning">Partial</span> | |
| E021-01 | CHARACTER data type | <span class="text-success">Yes</span> | |
| E021-02 | CHARACTER VARYING data type | <span class="text-success">Yes</span> | |
| E021-03 | Character literals | <span class="text-success">Yes</span> | |
| E021-04 | CHARACTER_LENGTH function | <span class="text-warning">Partial</span> | No `USING` clause |
| E021-05 | OCTET_LENGTH function | <span class="text-danger">No</span> | `LENGTH` behaves similarly |
| E021-06 | SUBSTRING | <span class="text-warning">Partial</span> | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
| E021-07 | Character concatenation | <span class="text-warning">Partial</span> | No `COLLATE` clause |
| E021-08 | UPPER and LOWER functions | <span class="text-success">Yes</span> | |
| E021-09 | TRIM function | <span class="text-success">Yes</span> | |
| E021-10 | Implicit casting among the fixed-length and variable-length character string types | <span class="text-warning">Partial</span> | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| E021-11 | POSITION function | <span class="text-warning">Partial</span> | No support for `IN` and `USING` clauses, no `POSITION_REGEX` variant |
| E021-12 | Character comparison | <span class="text-success">Yes</span> | |
| **E031** | **Identifiers** | <span class="text-warning">Partial</span>| |
| E031-01 | Delimited identifiers | <span class="text-warning">Partial</span> | Unicode literal support is limited |
| E031-02 | Lower case identifiers | <span class="text-success">Yes</span> | |
| E031-03 | Trailing underscore | <span class="text-success">Yes</span> | |
| **E051** | **Basic query specification** | <span class="text-warning">Partial</span>| |
| E051-01 | SELECT DISTINCT | <span class="text-success">Yes</span> | |
| E051-02 | GROUP BY clause | <span class="text-success">Yes</span> | |
| E051-04 | GROUP BY can contain columns not in `<select list>` | <span class="text-success">Yes</span> | |
| E051-05 | Select items can be renamed | <span class="text-success">Yes</span> | |
| E051-06 | HAVING clause | <span class="text-success">Yes</span> | |
| E051-07 | Qualified \* in select list | <span class="text-success">Yes</span> | |
| E051-08 | Correlation name in the FROM clause | <span class="text-success">Yes</span> | |
| E051-09 | Rename columns in the FROM clause | <span class="text-danger">No</span> | |
| **E061** | **Basic predicates and search conditions** | <span class="text-warning">Partial</span> | |
| E061-01 | Comparison predicate | <span class="text-success">Yes</span> | |
| E061-02 | BETWEEN predicate | <span class="text-warning">Partial</span> | No `SYMMETRIC` and `ASYMMETRIC` clause |
| E061-03 | IN predicate with list of values | <span class="text-success">Yes</span> | |
| E061-04 | LIKE predicate | <span class="text-success">Yes</span> | |
| E061-05 | LIKE predicate: ESCAPE clause | <span class="text-danger">No</span> | |
| E061-06 | NULL predicate | <span class="text-success">Yes</span> | |
| E061-07 | Quantified comparison predicate | <span class="text-danger">No</span> | |
| E061-08 | EXISTS predicate | <span class="text-danger">No</span> | |
| E061-09 | Subqueries in comparison predicate | <span class="text-success">Yes</span> | |
| E061-11 | Subqueries in IN predicate | <span class="text-success">Yes</span> | |
| E061-12 | Subqueries in quantified comparison predicate | <span class="text-danger">No</span> | |
| E061-13 | Correlated subqueries | <span class="text-danger">No</span> | |
| E061-14 | Search condition | <span class="text-success">Yes</span> | |
| **E071** | **Basic query expressions** | <span class="text-warning">Partial</span> | |
| E071-01 | UNION DISTINCT table operator | <span class="text-success">Yes</span> | |
| E071-02 | UNION ALL table operator | <span class="text-success">Yes</span> | |
| E071-03 | EXCEPT DISTINCT table operator | <span class="text-danger">No</span> | |
| E071-05 | Columns combined via table operators need not have exactly the same data type | <span class="text-success">Yes</span> | |
| E071-06 | Table operators in subqueries | <span class="text-success">Yes</span> | |
| **E081** | **Basic privileges** | <span class="text-success">Yes</span> |
| E081-01 | SELECT privilege at the table level | <span class="text-success">Yes</span> |
| E081-02 | DELETE privilege | |
| E081-03 | INSERT privilege at the table level | <span class="text-success">Yes</span> |
| E081-04 | UPDATE privilege at the table level | <span class="text-success">Yes</span> |
| E081-05 | UPDATE privilege at the column level | |
| E081-06 | REFERENCES privilege at the table level | | |
| E081-07 | REFERENCES privilege at the column level | | |
| E081-08 | WITH GRANT OPTION | <span class="text-success">Yes</span> | |
| E081-09 | USAGE privilege | | |
| E081-10 | EXECUTE privilege | | |
| **E091** | **Set functions** |<span class="text-success">Yes</span> |
| E091-01 | AVG | <span class="text-success">Yes</span> | |
| E091-02 | COUNT | <span class="text-success">Yes</span> | |
| E091-03 | MAX | <span class="text-success">Yes</span> | |
| E091-04 | MIN | <span class="text-success">Yes</span> | |
| E091-05 | SUM | <span class="text-success">Yes</span> | |
| E091-06 | ALL quantifier | <span class="text-success">Yes</span> | |
| E091-07 | DISTINCT quantifier | <span class="text-success">Yes</span> | Not all aggregate functions supported |
| **E101** | **Basic data manipulation** | <span class="text-warning">Partial</span> | |
| E101-01 | INSERT statement | <span class="text-success">Yes</span> | Note: primary key in ClickHouse does not imply the `UNIQUE` constraint |
| E101-03 | Searched UPDATE statement | <span class="text-warning">Partial</span> | Theres an `ALTER UPDATE` statement for batch data modification |
| E101-04 | Searched DELETE statement | <span class="text-warning">Partial</span> | Theres an `ALTER DELETE` statement for batch data removal |
| **E111** | **Single row SELECT statement** | <span class="text-danger">No</span> | |
| **E121** | **Basic cursor support** | <span class="text-danger">No</span> | |
| E121-01 | DECLARE CURSOR | <span class="text-danger">No</span> | |
| E121-02 | ORDER BY columns need not be in select list | <span class="text-success">Yes</span> | |
| E121-03 | Value expressions in ORDER BY clause | <span class="text-success">Yes</span> | |
| E121-04 | OPEN statement | <span class="text-danger">No</span> | |
| E121-06 | Positioned UPDATE statement | <span class="text-danger">No</span> | |
| E121-07 | Positioned DELETE statement | <span class="text-danger">No</span> | |
| E121-08 | CLOSE statement | <span class="text-danger">No</span> | |
| E121-10 | FETCH statement: implicit NEXT | <span class="text-danger">No</span> | |
| E121-17 | WITH HOLD cursors | <span class="text-danger">No</span> | |
| **E131** | **Null value support (nulls in lieu of values)** | <span class="text-success">Yes</span> | Some restrictions apply |
| **E141** | **Basic integrity constraints** | <span class="text-warning">Partial</span> | |
| E141-01 | NOT NULL constraints | <span class="text-success">Yes</span> | Note: `NOT NULL` is implied for table columns by default |
| E141-02 | UNIQUE constraint of NOT NULL columns | <span class="text-danger">No</span> | |
| E141-03 | PRIMARY KEY constraints | <span class="text-warning">Partial</span> | |
| E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | <span class="text-danger">No</span> | |
| E141-06 | CHECK constraint | <span class="text-success">Yes</span> | |
| E141-07 | Column defaults | <span class="text-success">Yes</span> | |
| E141-08 | NOT NULL inferred on PRIMARY KEY | <span class="text-success">Yes</span> | |
| E141-10 | Names in a foreign key can be specified in any order | <span class="text-danger">No</span> | |
| **E151** | **Transaction support** | <span class="text-danger">No</span> | |
| E151-01 | COMMIT statement | <span class="text-danger">No</span> | |
| E151-02 | ROLLBACK statement | <span class="text-danger">No</span> | |
| **E152** | **Basic SET TRANSACTION statement** | <span class="text-danger">No</span> | |
| E152-01 | SET TRANSACTION statement: ISOLATION LEVEL SERIALIZABLE clause | <span class="text-danger">No</span> | |
| E152-02 | SET TRANSACTION statement: READ ONLY and READ WRITE clauses | <span class="text-danger">No</span> | |
| **E153** | **Updatable queries with subqueries** | <span class="text-success">Yes</span> | |
| **E161** | **SQL comments using leading double minus** | <span class="text-success">Yes</span> | |
| **E171** | **SQLSTATE support** | <span class="text-danger">No</span> | |
| **E182** | **Host language binding** | <span class="text-danger">No</span> | |
| **F031** | **Basic schema manipulation** | <span class="text-warning">Partial</span>| |
| F031-01 | CREATE TABLE statement to create persistent base tables | <span class="text-warning">Partial</span> | No `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` clauses and no support for user resolved data types |
| F031-02 | CREATE VIEW statement | <span class="text-warning">Partial</span> | No `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` clauses and no support for user resolved data types |
| F031-03 | GRANT statement | <span class="text-success">Yes</span> | |
| F031-04 | ALTER TABLE statement: ADD COLUMN clause | <span class="text-success">Yes</span> | No support for `GENERATED` clause and system time period |
| F031-13 | DROP TABLE statement: RESTRICT clause | <span class="text-danger">No</span> | |
| F031-16 | DROP VIEW statement: RESTRICT clause | <span class="text-danger">No</span> | |
| F031-19 | REVOKE statement: RESTRICT clause | <span class="text-danger">No</span> | |
| **F041** | **Basic joined table** | <span class="text-warning">Partial</span> | |
| F041-01 | Inner join (but not necessarily the INNER keyword) | <span class="text-success">Yes</span> | |
| F041-02 | INNER keyword | <span class="text-success">Yes</span> | |
| F041-03 | LEFT OUTER JOIN | <span class="text-success">Yes</span> | |
| F041-04 | RIGHT OUTER JOIN | <span class="text-success">Yes</span> | |
| F041-05 | Outer joins can be nested | <span class="text-success">Yes</span> | |
| F041-07 | The inner table in a left or right outer join can also be used in an inner join | <span class="text-success">Yes</span> | |
| F041-08 | All comparison operators are supported (rather than just =) | <span class="text-danger">No</span> | |
| **F051** | **Basic date and time** | <span class="text-warning">Partial</span> | |
| F051-01 | DATE data type (including support of DATE literal) | <span class="text-success">Yes</span> | |
| F051-02 | TIME data type (including support of TIME literal) with fractional seconds precision of at least 0 | <span class="text-danger">No</span> | |
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | <span class="text-success">Yes</span> | |
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | <span class="text-success">Yes</span> | |
| F051-05 | Explicit CAST between datetime types and character string types | <span class="text-success">Yes</span> | |
| F051-06 | CURRENT_DATE | <span class="text-danger">No</span> | `today()` is similar |
| F051-07 | LOCALTIME | <span class="text-danger">No</span> | `now()` is similar |
| F051-08 | LOCALTIMESTAMP | <span class="text-danger">No</span> | |
| **F081** | **UNION and EXCEPT in views** | <span class="text-warning">Partial</span> | |
| **F131** | **Grouped operations** | <span class="text-warning">Partial</span> | |
| F131-01 | WHERE, GROUP BY, and HAVING clauses supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-02 | Multiple tables supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-03 | Set functions supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-04 | Subqueries with GROUP BY and HAVING clauses and grouped views | <span class="text-success">Yes</span> | |
| F131-05 | Single row SELECT with GROUP BY and HAVING clauses and grouped views | <span class="text-danger">No</span> | |
| **F181** | **Multiple module support** | <span class="text-danger">No</span> | |
| **F201** | **CAST function** | <span class="text-success">Yes</span> | |
| **F221** | **Explicit defaults** | <span class="text-danger">No</span> | |
| **F261** | **CASE expression** | <span class="text-success">Yes</span> | |
| F261-01 | Simple CASE | <span class="text-success">Yes</span> | |
| F261-02 | Searched CASE | <span class="text-success">Yes</span> | |
| F261-03 | NULLIF | <span class="text-success">Yes</span> | |
| F261-04 | COALESCE | <span class="text-success">Yes</span> | |
| **F311** | **Schema definition statement** | <span class="text-warning">Partial</span> | |
| F311-01 | CREATE SCHEMA | <span class="text-warning">Partial</span> | See CREATE DATABASE |
| F311-02 | CREATE TABLE for persistent base tables | <span class="text-success">Yes</span> | |
| F311-03 | CREATE VIEW | <span class="text-success">Yes</span> | |
| F311-04 | CREATE VIEW: WITH CHECK OPTION | <span class="text-danger">No</span> | |
| F311-05 | GRANT statement | <span class="text-success">Yes</span> | |
| **F471** | **Scalar subquery values** | <span class="text-success">Yes</span> | |
| **F481** | **Expanded NULL predicate** | <span class="text-success">Yes</span> | |
| **F812** | **Basic flagging** | <span class="text-danger">No</span> | |
| **S011** | **Distinct data types** | | |
| **T321** | **Basic SQL-invoked routines** | <span class="text-danger">No</span> | |
| T321-01 | User-defined functions with no overloading | <span class="text-danger">No</span> | |
| T321-02 | User-defined stored procedures with no overloading | <span class="text-danger">No</span> | |
| T321-03 | Function invocation | <span class="text-danger">No</span> | |
| T321-04 | CALL statement | <span class="text-danger">No</span> | |
| T321-05 | RETURN statement | <span class="text-danger">No</span> | |
| **T631** | **IN predicate with one list element** | <span class="text-success">Yes</span> | |

View File

@ -5261,9 +5261,9 @@ SELECT toFixedString('foo', 8) AS s;
Result:
```response
┌─s─────────────┬─s_cut─
│ foo\0\0\0\0\0 │ foo │
└───────────────┴───────
┌─s─────────────┐
│ foo\0\0\0\0\0 │
└───────────────┘
```
## toStringCutToZero

View File

@ -1,2 +0,0 @@
# Just an empty yaml file. Keep it alone.
{}

View File

@ -33,7 +33,7 @@ sidebar_label: "Отличительные возможности ClickHouse"
## Поддержка SQL {#sql-support}
ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL-стандартом.
ClickHouse поддерживает декларативный язык запросов SQL.
Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.mdx), а также скалярные подзапросы.

View File

@ -1,10 +0,0 @@
---
slug: /ru/sql-reference/ansi
sidebar_position: 40
sidebar_label: ANSI Compatibility
title: "ANSI Compatibility"
---
import Content from '@site/docs/en/sql-reference/ansi.md';
<Content />

View File

@ -37,7 +37,7 @@ ClickHouse会使用服务器上一切可用的资源从而以最自然的方
## 支持SQL {#zhi-chi-sql}
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同
ClickHouse支持一种基于SQL的声明式查询语言。
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。

View File

@ -1,191 +0,0 @@
---
slug: /zh/sql-reference/ansi
sidebar_position: 40
sidebar_label: "ANSI\u517C\u5BB9\u6027"
---
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
:::note
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::
## 行为差异 {#differences-in-behaviour}
下表列出了ClickHouse能够使用但与ANSI SQL规定有差异的查询特性。
| 功能ID | 功能名称 | 差异 |
| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| E011 | 数值型数据类型 | 带小数点的数字被视为近似值 (`Float64`)而不是精确值 (`Decimal`) |
| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage) |
| E141-01 | NOT NULL非空约束 | ClickHouse表中每一列默认为`NOT NULL` |
| E011-04 | 算术运算符 | ClickHouse在运算时会进行溢出而不是四舍五入。此外会根据自定义规则修改结果数据类型参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha) |
## 功能状态 {#feature-status}
| 功能ID | 功能名称 | 状态 | 注释 |
| -------- | ---------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **E011** | **数值型数据类型** | **部分**{.text-warning} | |
| E011-01 | INTEGER 整型和SMALLINT (小整型)数据类型 | 是 {.text-success} | |
| E011-02 | REAL 实数、DOUBLE PRECISION 双精度浮点数和FLOAT单精度浮点数数据类型数据类型 | 是 {.text-success} | |
| E011-03 | DECIMAL 精确数字和NUMERIC (精确数字)数据类型 | 是 {.text-success} | |
| E011-04 | 算术运算符 | 是 {.text-success} | |
| E011-05 | 数值比较 | 是 {.text-success} | |
| E011-06 | 数值数据类型之间的隐式转换 | 否 {.text-danger} | ANSI SQL允许在数值类型之间进行任意隐式转换而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
| **E021** | **字符串类型** | **部分**{.text-warning} | |
| E021-01 | CHARACTER (字符串)数据类型 | 是 {.text-success} | |
| E021-02 | CHARACTER VARYING (可变字符串)数据类型 | 是 {.text-success} | |
| E021-03 | 字符字面量 | 是 {.text-success} | |
| E021-04 | CHARACTER_LENGTH 函数 | 部分 {.text-warning} | 不支持 `using` 从句 |
| E021-05 | OCTET_LENGTH 函数 | 否 {.text-danger} | 使用 `LENGTH` 函数代替 |
| E021-06 | SUBSTRING | 部分 {.text-warning} | 不支持 `SIMILAR``ESCAPE` 从句,没有`SUBSTRING_REGEX` 函数 |
| E021-07 | 字符串拼接 | 部分 {.text-warning} | 不支持 `COLLATE` 从句 |
| E021-08 | 大小写转换 | 是 {.text-success} | |
| E021-09 | 裁剪字符串 | 是 {.text-success} | |
| E021-10 | 固定长度和可变长度字符串类型之间的隐式转换 | 部分 {.text-warning} | ANSI SQL允许在数据类型之间进行任意隐式转换而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
| E021-11 | POSITION 函数 | 部分 {.text-warning} | 不支持 `IN``USING` 从句,不支持`POSITION_REGEX`函数 |
| E021-12 | 字符串比较 | 是 {.text-success} | |
| **E031** | **标识符** | **部分**{.text-warning} | |
| E031-01 | 分隔标识符 | 部分 {.text-warning} | Unicode文字支持有限 |
| E031-02 | 小写标识符 | 是 {.text-success} | |
| E031-03 | 标识符最后加下划线 | 是 {.text-success} | |
| **E051** | **基本查询规范** | **部分**{.text-warning} | |
| E051-01 | SELECT DISTINCT | 是 {.text-success} | |
| E051-02 | GROUP BY 从句 | 是 {.text-success} | |
| E051-04 | GROUP BY 从句中的列可以包含不在 `<select list>`中出现的列 | 是 {.text-success} | |
| E051-05 | SELECT 的列可以重命名 | 是 {.text-success} | |
| E051-06 | HAVING 从句 | 是 {.text-success} | |
| E051-07 | SELECT 选择的列中允许出现\* | 是 {.text-success} | |
| E051-08 | FROM 从句中的关联名称 | 是 {.text-success} | |
| E051-09 | 重命名 FROM 从句中的列 | 否 {.text-danger} | |
| **E061** | **基本谓词和搜索条件** | **部分**{.text-warning} | |
| E061-01 | 比较谓词 | 是 {.text-success} | |
| E061-02 | BETWEEN 谓词 | 部分 {.text-warning} | 不支持 `SYMMETRIC``ASYMMETRIC` 从句 |
| E061-03 | IN 谓词后可接值列表 | 是 {.text-success} | |
| E061-04 | LIKE 谓词 | 是 {.text-success} | |
| E061-05 | LIKE 谓词后接 ESCAPE 从句 | 否 {.text-danger} | |
| E061-06 | NULL 谓词 | 是 {.text-success} | |
| E061-07 | 量化比较谓词ALL、SOME、ANY | 否 {.text-danger} | |
| E061-08 | EXISTS 谓词 | 否 {.text-danger} | |
| E061-09 | 比较谓词中的子查询 | 是 {.text-success} | |
| E061-11 | IN 谓词中的子查询 | 是 {.text-success} | |
| E061-12 | 量化比较谓词BETWEEN、IN、LIKE中的子查询 | 否 {.text-danger} | |
| E061-13 | 相关子查询 | 否 {.text-danger} | |
| E061-14 | 搜索条件 | 是 {.text-success} | |
| **E071** | **基本查询表达式** | **部分**{.text-warning} | |
| E071-01 | UNION DISTINCT 表运算符 | 是 {.text-success} | |
| E071-02 | UNION ALL 表运算符 | 是 {.text-success} | |
| E071-03 | EXCEPT DISTINCT 表运算符 | 否 {.text-danger} | |
| E071-05 | 通过表运算符组合的列不必具有完全相同的数据类型 | 是 {.text-success} | |
| E071-06 | 子查询中的表运算符 | 是 {.text-success} | |
| **E081** | **基本权限** | **是**{.text-success} | |
| E081-01 | 表级别的SELECT查询权限 | 是 {.text-success} | |
| E081-02 | DELETE删除权限 | 是 {.text-success} | |
| E081-03 | 表级别的INSERT插入权限 | 是 {.text-success} | |
| E081-04 | 表级别的UPDATE更新权限 | 是 {.text-success} | |
| E081-05 | 列级别的UPDATE更新权限 | 是 {.text-success} | |
| E081-06 | 表级别的REFERENCES引用权限 | 是 {.text-success} | |
| E081-07 | 列级别的REFERENCES引用权限 | 是 {.text-success} | |
| E081-08 | WITH GRANT OPTION | 是 {.text-success} | |
| E081-09 | USAGE使用权限 | 是 {.text-success} | |
| E081-10 | EXECUTE执行权限 | 是 {.text-success} | |
| **E091** | **集合函数** | **是**{.text-success} | |
| E091-01 | AVG | 是 {.text-success} | |
| E091-02 | COUNT | 是 {.text-success} | |
| E091-03 | MAX | 是 {.text-success} | |
| E091-04 | MIN | 是 {.text-success} | |
| E091-05 | SUM | 是 {.text-success} | |
| E091-06 | ALL修饰词 | 否。 {.text-danger} | |
| E091-07 | DISTINCT修饰词 | 是 {.text-success} | 并非所有聚合函数都支持该修饰词 |
| **E101** | **基本数据操作** | **部分**{.text-warning} | |
| E101-01 | INSERT插入语句 | 是 {.text-success} | 注ClickHouse中的主键并不隐含`UNIQUE` 约束 |
| E101-03 | 可指定范围的UPDATE更新语句 | 部分 {.text-warning} | `ALTER UPDATE` 语句用来批量更新数据 |
| E101-04 | 可指定范围的DELETE删除语句 | 部分 {.text-warning} | `ALTER DELETE` 语句用来批量删除数据 |
| **E111** | **返回一行的SELECT语句** | **否**{.text-danger} | |
| **E121** | **基本游标支持** | **否**{.text-danger} | |
| E121-01 | DECLARE CURSOR | 否 {.text-danger} | |
| E121-02 | ORDER BY 涉及的列不需要出现在SELECT的列中 | 是 {.text-success} | |
| E121-03 | ORDER BY 从句中的表达式 | 是 {.text-success} | |
| E121-04 | OPEN 语句 | 否 {.text-danger} | |
| E121-06 | 受游标位置控制的 UPDATE 语句 | 否 {.text-danger} | |
| E121-07 | 受游标位置控制的 DELETE 语句 | 否 {.text-danger} | |
| E121-08 | CLOSE 语句 | 否 {.text-danger} | |
| E121-10 | FETCH 语句中包含隐式NEXT | 否 {.text-danger} | |
| E121-17 | WITH HOLD 游标 | 否 {.text-danger} | |
| **E131** | **空值支持** | **是**{.text-success} | 有部分限制 |
| **E141** | **基本完整性约束** | **部分**{.text-warning} | |
| E141-01 | NOT NULL非空约束 | 是 {.text-success} | 注: 默认情况下ClickHouse表中的列隐含`NOT NULL`约束 |
| E141-02 | NOT NULL非空列的UNIQUE唯一约束 | 否 {.text-danger} | |
| E141-03 | 主键约束 | 部分 {.text-warning} | |
| E141-04 | 对于引用删除和引用更新操作基本的FOREIGN KEY外键约束默认不进行任何操作NO ACTION | 否 {.text-danger} | |
| E141-06 | CHECK检查约束 | 是 {.text-success} | |
| E141-07 | 列默认值 | 是 {.text-success} | |
| E141-08 | 在主键上推断非空 | 是 {.text-success} | |
| E141-10 | 可以按任何顺序指定外键中的名称 | 否 {.text-danger} | |
| **E151** | **事务支持** | **否**{.text-danger} | |
| E151-01 | COMMIT提交语句 | 否 {.text-danger} | |
| E151-02 | ROLLBACK回滚语句 | 否 {.text-danger} | |
| **E152** | **基本的SET TRANSACTION设置事务隔离级别语句** | **否**{.text-danger} | |
| E152-01 | SET TRANSACTION语句ISOLATION LEVEL SERIALIZABLE隔离级别为串行化从句 | 否 {.text-danger} | |
| E152-02 | SET TRANSACTION语句READ ONLY只读和READ WRITE读写从句 | 否 {.text-danger} | |
| **E153** | **具有子查询的可更新查询** | **是**{.text-success} | |
| **E161** | **使用“--”符号作为SQL注释** | **是**{.text-success} | |
| **E171** | **SQLSTATE支持** | **否**{.text-danger} | |
| **E182** | **主机语言绑定** | **否**{.text-danger} | |
| **F031** | **基本架构操作** | **部分**{.text-warning} | |
| F031-01 | 使用 CREATE TABLE 语句创建持久表 | 部分 {.text-warning} | 不支持 `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` 从句,不支持用户解析的数据类型 |
| F031-02 | CREATE VIEW创建视图语句 | 部分 {.text-warning} | 不支持 `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` 从句,不支持用户解析的数据类型 |
| F031-03 | GRANT授权语句 | 是 {.text-success} | |
| F031-04 | ALTER TABLE语句ADD COLUMN从句 | 是 {.text-success} | 不支持 `GENERATED` 从句和以系统时间做参数 |
| F031-13 | DROP TABLE语句RESTRICT从句 | 否 {.text-danger} | |
| F031-16 | DROP VIEW语句RESTRICT子句 | 否 {.text-danger} | |
| F031-19 | REVOKE语句RESTRICT子句 | 否 {.text-danger} | |
| **F041** | **基本连接关系** | **部分**{.text-warning} | |
| F041-01 | Inner join但不一定是INNER关键字) | 是 {.text-success} | |
| F041-02 | INNER 关键字 | 是 {.text-success} | |
| F041-03 | LEFT OUTER JOIN | 是 {.text-success} | |
| F041-04 | RIGHT OUTER JOIN | 是 {.text-success} | |
| F041-05 | 外连接可嵌套 | 是 {.text-success} | |
| F041-07 | 左外部连接或右外连接中的内部表也可用于内部联接 | 是 {.text-success} | |
| F041-08 | 支持所有比较运算符(而不仅仅是=) | 否 {.text-danger} | |
| **F051** | **基本日期和时间** | **部分**{.text-warning} | |
| F051-01 | DATE日期数据类型并支持用于表达日期的字面量) | 是 {.text-success} | |
| F051-02 | TIME时间数据类型并支持用于表达时间的字面量小数秒精度至少为0 | 否 {.text-danger} | |
| F051-03 | 时间戳数据类型并支持用于表达时间戳的字面量小数秒精度至少为0和6 | 是 {.text-danger} | |
| F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | |
| F051-05 | DateTime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | |
| F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 |
| F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 |
| F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | |
| **F081** | **视图的UNION和EXCEPT操作** | **部分**{.text-warning} | |
| **F131** | **分组操作** | **部分**{.text-warning} | |
| F131-01 | 在具有分组视图的查询中支持 WHERE、GROUP BY 和 HAVING 子句 | 是 {.text-success} | |
| F131-02 | 在分组视图中支持多张表 | 是 {.text-success} | |
| F131-03 | 分组视图的查询中支持集合函数 | 是 {.text-success} | |
| F131-04 | 带有 `GROUP BY``HAVING` 从句,以及分组视图的子查询 | 是 {.text-success} | |
| F131-05 | 带有 `GROUP BY``HAVING` 从句以及分组视图的仅返回1条记录的SELECT查询 | 否 {.text-danger} | |
| **F181** | **多模块支持** | **否**{.text-danger} | |
| **F201** | **CAST 函数** | **是**{.text-success} | |
| **F221** | **显式默认值** | **否**{.text-danger} | |
| **F261** | **CASE 表达式** | **是**{.text-success} | |
| F261-01 | 简单 CASE 表达式 | 是 {.text-success} | |
| F261-02 | 搜索型 CASE 表达式 | 是 {.text-success} | |
| F261-03 | NULLIF | 是 {.text-success} | |
| F261-04 | COALESCE | 是 {.text-success} | |
| **F311** | **架构定义语句** | **部分**{.text-warning} | |
| F311-01 | CREATE SCHEMA | 部分 {.text-warning} | 见`CREATE DATABASE` |
| F311-02 | 用于创建持久表的 CREATE TABLE | 是 {.text-success} | |
| F311-03 | CREATE VIEW | 是 {.text-success} | |
| F311-04 | CREATE VIEW: WITH CHECK OPTION | 否 {.text-danger} | |
| F311-05 | GRANT 语句 | 是 {.text-success} | |
| **F471** | **标量子查询** | **是**{.text-success} | |
| **F481** | **扩展 NULL 谓词** | **是**{.text-success} | |
| **F812** | **基本标志位** | **否**{.text-danger} |
| **S011** | **用于不重复数据的数据类型** | **否**{.text-danger} |
| **T321** | **基本的SQL调用例程** | **否**{.text-danger} | |
| T321-01 | 没有重载的用户定义函数 | 否{.text-danger} | |
| T321-02 | 没有重载的用户定义存储过程 | 否{.text-danger} | |
| T321-03 | 功能调用 | 否 {.text-danger} | |
| T321-04 | CALL 语句 | 否 {.text-danger} | |
| T321-05 | RETURN 语句 | 否 {.text-danger} | |
| **T631** | **IN 谓词后接一个列表** | **是**{.text-success} | |

View File

@ -1496,6 +1496,8 @@ try
NamedCollectionFactory::instance().loadIfNot();
FileCacheFactory::instance().loadDefaultCaches(config());
/// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");

View File

@ -117,20 +117,20 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
}
void AuthenticationData::setPassword(const String & password_)
void AuthenticationData::setPassword(const String & password_, bool validate)
{
switch (type)
{
case AuthenticationType::PLAINTEXT_PASSWORD:
setPasswordHashBinary(Util::stringToDigest(password_));
setPasswordHashBinary(Util::stringToDigest(password_), validate);
return;
case AuthenticationType::SHA256_PASSWORD:
setPasswordHashBinary(Util::encodeSHA256(password_));
setPasswordHashBinary(Util::encodeSHA256(password_), validate);
return;
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
setPasswordHashBinary(Util::encodeDoubleSHA1(password_));
setPasswordHashBinary(Util::encodeDoubleSHA1(password_), validate);
return;
case AuthenticationType::BCRYPT_PASSWORD:
@ -149,12 +149,12 @@ void AuthenticationData::setPassword(const String & password_)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPassword(): authentication type {} not supported", toString(type));
}
void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_)
void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_, bool validate)
{
if (type != AuthenticationType::BCRYPT_PASSWORD)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify bcrypt password for authentication type {}", toString(type));
setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_));
setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_), validate);
}
String AuthenticationData::getPassword() const
@ -165,7 +165,7 @@ String AuthenticationData::getPassword() const
}
void AuthenticationData::setPasswordHashHex(const String & hash)
void AuthenticationData::setPasswordHashHex(const String & hash, bool validate)
{
Digest digest;
digest.resize(hash.size() / 2);
@ -179,7 +179,7 @@ void AuthenticationData::setPasswordHashHex(const String & hash)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
}
setPasswordHashBinary(digest);
setPasswordHashBinary(digest, validate);
}
@ -195,7 +195,7 @@ String AuthenticationData::getPasswordHashHex() const
}
void AuthenticationData::setPasswordHashBinary(const Digest & hash)
void AuthenticationData::setPasswordHashBinary(const Digest & hash, bool validate)
{
switch (type)
{
@ -217,7 +217,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
{
if (hash.size() != 20)
if (validate && hash.size() != 20)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length {} "
"but must be exactly 20 bytes.", hash.size());
@ -231,7 +231,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
/// However the library we use to encode it requires hash string to be 64 characters long,
/// so we also allow the hash of this length.
if (hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
if (validate && hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
"but must be 59 or 60 bytes.", hash.size());
@ -240,10 +240,13 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
resized.resize(64);
#if USE_BCRYPT
/// Verify that it is a valid hash
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
if (ret == -1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
if (validate)
{
/// Verify that it is a valid hash
int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
if (ret == -1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
}
#endif
password_hash = hash;
@ -385,7 +388,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
}
AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules)
AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate)
{
if (query.type && query.type == AuthenticationType::NO_PASSWORD)
return AuthenticationData();
@ -431,7 +434,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
if (!query.type && !context)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get default password type without context");
if (check_password_rules && !context)
if (validate && !context)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot check password complexity rules without context");
if (query.type == AuthenticationType::BCRYPT_PASSWORD && !context)
@ -448,13 +451,13 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
AuthenticationData auth_data(current_type);
if (check_password_rules)
if (validate)
context->getAccessControl().checkPasswordComplexityRules(value);
if (query.type == AuthenticationType::BCRYPT_PASSWORD)
{
int workfactor = context->getAccessControl().getBcryptWorkfactor();
auth_data.setPasswordBcrypt(value, workfactor);
auth_data.setPasswordBcrypt(value, workfactor, validate);
return auth_data;
}
@ -486,7 +489,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
#endif
}
auth_data.setPassword(value);
auth_data.setPassword(value, validate);
return auth_data;
}
@ -498,11 +501,11 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
if (query.type == AuthenticationType::BCRYPT_PASSWORD)
{
auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value));
auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value), validate);
return auth_data;
}
auth_data.setPasswordHashHex(value);
auth_data.setPasswordHashHex(value, validate);
if (query.type == AuthenticationType::SHA256_PASSWORD && args_size == 2)

View File

@ -31,17 +31,17 @@ public:
AuthenticationType getType() const { return type; }
/// Sets the password and encrypt it using the authentication type set in the constructor.
void setPassword(const String & password_);
void setPassword(const String & password_, bool validate);
/// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD.
String getPassword() const;
/// Sets the password as a string of hexadecimal digits.
void setPasswordHashHex(const String & hash);
void setPasswordHashHex(const String & hash, bool validate);
String getPasswordHashHex() const;
/// Sets the password in binary form.
void setPasswordHashBinary(const Digest & hash);
void setPasswordHashBinary(const Digest & hash, bool validate);
const Digest & getPasswordHashBinary() const { return password_hash; }
/// Sets the salt in String form.
@ -49,7 +49,7 @@ public:
String getSalt() const;
/// Sets the password using bcrypt hash with specified workfactor
void setPasswordBcrypt(const String & password_, int workfactor_);
void setPasswordBcrypt(const String & password_, int workfactor_, bool validate);
/// Sets the server name for authentication type LDAP.
const String & getLDAPServerName() const { return ldap_server_name; }
@ -77,7 +77,7 @@ public:
friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules);
static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate);
std::shared_ptr<ASTAuthenticationData> toAST() const;
struct Util

View File

@ -121,6 +121,7 @@ namespace
bool allow_no_password,
bool allow_plaintext_password)
{
const bool validate = true;
auto user = std::make_shared<User>();
user->setName(user_name);
String user_config = "users." + user_name;
@ -157,17 +158,17 @@ namespace
if (has_password_plaintext)
{
user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD);
user->authentication_methods.back().setPassword(config.getString(user_config + ".password"));
user->authentication_methods.back().setPassword(config.getString(user_config + ".password"), validate);
}
else if (has_password_sha256_hex)
{
user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD);
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"));
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"), validate);
}
else if (has_password_double_sha1_hex)
{
user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD);
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"));
user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"), validate);
}
else if (has_ldap)
{

View File

@ -78,11 +78,6 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
}
String getName() const override { return name; }
void create(AggregateDataPtr __restrict) const override { }
void destroy(AggregateDataPtr __restrict) const noexcept override { }
bool hasTrivialDestructor() const override { return true; }
size_t sizeOfData() const override { return 0; }
size_t alignOfData() const override { return 1; }
void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
@ -90,6 +85,22 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
};
struct StatelessWindowFunction : public WindowFunction
{
StatelessWindowFunction(
const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
: WindowFunction(name_, argument_types_, parameters_, result_type_)
{
}
size_t sizeOfData() const override { return 0; }
size_t alignOfData() const override { return 1; }
void create(AggregateDataPtr __restrict) const override { }
void destroy(AggregateDataPtr __restrict) const noexcept override { }
bool hasTrivialDestructor() const override { return true; }
};
template <typename State>
struct StatefulWindowFunction : public WindowFunction
{
@ -100,7 +111,7 @@ struct StatefulWindowFunction : public WindowFunction
}
size_t sizeOfData() const override { return sizeof(State); }
size_t alignOfData() const override { return 1; }
size_t alignOfData() const override { return alignof(State); }
void create(AggregateDataPtr __restrict place) const override { new (place) State(); }

View File

@ -34,7 +34,7 @@ namespace ErrorCodes
namespace
{
void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
void extractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
{
auto * func = node->as<FunctionNode>();
if (!func)
@ -52,7 +52,7 @@ void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi
else if (func->getFunctionName() == "and")
{
for (const auto & arg : args)
exctractJoinConditions(arg, equi_conditions, other);
extractJoinConditions(arg, equi_conditions, other);
}
else
{
@ -118,7 +118,7 @@ public:
QueryTreeNodes equi_conditions;
QueryTreeNodes other_conditions;
exctractJoinConditions(where_condition, equi_conditions, other_conditions);
extractJoinConditions(where_condition, equi_conditions, other_conditions);
bool can_convert_cross_to_inner = false;
for (auto & condition : equi_conditions)
{

View File

@ -15,7 +15,7 @@ namespace DB
{
struct ConnectionParameters
{
std::string host;
String host;
UInt16 port{};
std::string default_database;
std::string user;
@ -30,8 +30,8 @@ struct ConnectionParameters
ConnectionTimeouts timeouts;
ConnectionParameters() = default;
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional<UInt16> port);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host, std::optional<UInt16> port);
static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host);

View File

@ -291,9 +291,14 @@
M(CacheWarmerBytesInProgress, "Total size of remote file segments waiting to be asynchronously loaded into filesystem cache.") \
M(DistrCacheOpenedConnections, "Number of open connections to Distributed Cache") \
M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \
M(DistrCacheAllocatedConnections, "Number of currently allocated connections to Distributed Cache connection pool") \
M(DistrCacheBorrowedConnections, "Number of currently borrowed connections to Distributed Cache connection pool") \
M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \
M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \
M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \
M(DistrCacheRegisteredServers, "Number of distributed cache registered servers") \
M(DistrCacheRegisteredServersCurrentAZ, "Number of distributed cache registered servers in current az") \
M(DistrCacheServerS3CachedClients, "Number of distributed cache S3 cached clients") \
\
M(SchedulerIOReadScheduled, "Number of IO reads are being scheduled currently") \
M(SchedulerIOWriteScheduled, "Number of IO writes are being scheduled currently") \
@ -314,6 +319,20 @@
M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
\
M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
\
M(SharedCatalogStateApplicationThreads, "Number of threads in the threadpool for state application in Shared Catalog.") \
M(SharedCatalogStateApplicationThreadsActive, "Number of active threads in the threadpool for state application in Shared Catalog.") \
M(SharedCatalogStateApplicationThreadsScheduled, "Number of queued or active jobs in the threadpool for state application in Shared Catalog.") \
\
M(SharedCatalogDropLocalThreads, "Number of threads in the threadpool for drop of local tables in Shared Catalog.") \
M(SharedCatalogDropLocalThreadsActive, "Number of active threads in the threadpool for drop of local tables in Shared Catalog.") \
M(SharedCatalogDropLocalThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of local tables in Shared Catalog.") \
\
M(SharedCatalogDropZooKeeperThreads, "Number of threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
M(SharedCatalogDropZooKeeperThreadsActive, "Number of active threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
M(SharedCatalogDropZooKeeperThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
\
M(SharedDatabaseCatalogTablesInLocalDropDetachQueue, "Number of tables in the queue for local drop or detach in Shared Catalog.") \
#ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)

View File

@ -614,6 +614,7 @@
\
M(900, DISTRIBUTED_CACHE_ERROR) \
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
M(902, PROTOCOL_VERSION_MISMATCH) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -49,11 +49,21 @@ static struct InitFiu
ONCE(smt_commit_write_zk_fail_before_op) \
ONCE(smt_commit_merge_change_version_before_op) \
ONCE(smt_merge_mutate_intention_freeze_in_destructor) \
ONCE(smt_add_part_sleep_after_add_before_commit) \
ONCE(smt_sleep_in_constructor) \
ONCE(meta_in_keeper_create_metadata_failure) \
ONCE(smt_insert_retry_timeout) \
ONCE(smt_insert_fake_hardware_error) \
ONCE(smt_sleep_after_hardware_in_insert) \
ONCE(smt_throw_keeper_exception_after_successful_insert) \
REGULAR(smt_dont_merge_first_part) \
REGULAR(smt_sleep_in_schedule_data_processing_job) \
REGULAR(cache_warmer_stall) \
REGULAR(check_table_query_delay_for_part) \
REGULAR(dummy_failpoint) \
REGULAR(prefetched_reader_pool_failpoint) \
REGULAR(shared_set_sleep_during_update) \
REGULAR(smt_outdated_parts_exception_response) \
PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \
PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \

View File

@ -241,6 +241,8 @@
M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges", ValueType::Milliseconds) \
M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges", ValueType::Milliseconds) \
M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges", ValueType::Milliseconds) \
M(MergePrewarmStageTotalMilliseconds, "Total time spent for prewarm stage of background merges", ValueType::Milliseconds) \
M(MergePrewarmStageExecuteMilliseconds, "Total busy time spent for execution of prewarm stage of background merges", ValueType::Milliseconds) \
\
M(MergingSortedMilliseconds, "Total time spent while merging sorted columns", ValueType::Milliseconds) \
M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns", ValueType::Milliseconds) \
@ -639,6 +641,8 @@ The server successfully detected this situation and will download merged part fr
M(MetadataFromKeeperBackgroundCleanupTransactions, "Number of times old transaction idempotency token was cleaned up by background task", ValueType::Number) \
M(MetadataFromKeeperBackgroundCleanupErrors, "Number of times an error was encountered in background cleanup task", ValueType::Number) \
\
M(SharedMergeTreeMetadataCacheHintLoadedFromCache, "Number of times metadata cache hint was found without going to Keeper", ValueType::Number) \
\
M(KafkaRebalanceRevocations, "Number of partition revocations (the first stage of consumer group rebalance)", ValueType::Number) \
M(KafkaRebalanceAssignments, "Number of partition assignments (the final stage of consumer group rebalance)", ValueType::Number) \
M(KafkaRebalanceErrors, "Number of failed consumer group rebalances", ValueType::Number) \
@ -742,29 +746,51 @@ The server successfully detected this situation and will download merged part fr
M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.", ValueType::Microseconds) \
M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.", ValueType::Microseconds) \
\
M(DistrCacheServerSwitches, "Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
M(DistrCacheReadMicroseconds, "Time spent reading from distributed cache", ValueType::Microseconds) \
M(DistrCacheFallbackReadMicroseconds, "Time spend reading from fallback buffer instead of distribted cache", ValueType::Microseconds) \
M(DistrCachePrecomputeRangesMicroseconds, "Time spent to precompute read ranges", ValueType::Microseconds) \
M(DistrCacheNextImplMicroseconds, "Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
M(DistrCacheOpenedConnections, "The number of open connections to distributed cache", ValueType::Number) \
M(DistrCacheReusedConnections, "The number of reused connections to distributed cache", ValueType::Number) \
M(DistrCacheHoldConnections, "The number of used connections to distributed cache", ValueType::Number) \
M(DistrCacheServerSwitches, "Distributed Cache read buffer event. Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
M(DistrCacheReadMicroseconds, "Distributed Cache read buffer event. Time spent reading from distributed cache", ValueType::Microseconds) \
M(DistrCacheFallbackReadMicroseconds, "Distributed Cache read buffer event. Time spend reading from fallback buffer instead of distributed cache", ValueType::Microseconds) \
M(DistrCachePrecomputeRangesMicroseconds, "Distributed Cache read buffer event. Time spent to precompute read ranges", ValueType::Microseconds) \
M(DistrCacheNextImplMicroseconds, "Distributed Cache read buffer event. Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
M(DistrCacheStartRangeMicroseconds, "Distributed Cache read buffer event. Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
M(DistrCacheIgnoredBytesWhileWaitingProfileEvents, "Distributed Cache read buffer event. Ignored bytes while waiting for profile events in distributed cache", ValueType::Number) \
M(DistrCacheRangeChange, "Distributed Cache read buffer event. Number of times we changed read range because of seek/last_position change", ValueType::Number) \
\
M(DistrCacheGetResponseMicroseconds, "Time spend to wait for response from distributed cache", ValueType::Microseconds) \
M(DistrCacheStartRangeMicroseconds, "Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
M(DistrCacheLockRegistryMicroseconds, "Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
M(DistrCacheUnusedPackets, "Number of skipped unused packets from distributed cache", ValueType::Number) \
M(DistrCachePackets, "Total number of packets received from distributed cache", ValueType::Number) \
M(DistrCacheUnusedPacketsBytes, "The number of bytes in Data packets which were ignored", ValueType::Bytes) \
M(DistrCacheRegistryUpdateMicroseconds, "Time spent updating distributed cache registry", ValueType::Microseconds) \
M(DistrCacheRegistryUpdates, "Number of distributed cache registry updates", ValueType::Number) \
M(DistrCacheGetResponseMicroseconds, "Distributed Cache client event. Time spend to wait for response from distributed cache", ValueType::Microseconds) \
M(DistrCacheReadErrors, "Distributed Cache client event. Number of distributed cache errors during read", ValueType::Number) \
M(DistrCacheMakeRequestErrors, "Distributed Cache client event. Number of distributed cache errors when making a request", ValueType::Number) \
M(DistrCacheReceiveResponseErrors, "Distributed Cache client event. Number of distributed cache errors when receiving response a request", ValueType::Number) \
\
M(DistrCacheConnectMicroseconds, "The time spent to connect to distributed cache", ValueType::Microseconds) \
M(DistrCacheConnectAttempts, "The number of connection attempts to distributed cache", ValueType::Number) \
M(DistrCacheGetClient, "Number of client access times", ValueType::Number) \
M(DistrCachePackets, "Distributed Cache client event. Total number of packets received from distributed cache", ValueType::Number) \
M(DistrCachePacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were not ignored", ValueType::Bytes) \
M(DistrCacheUnusedPackets, "Distributed Cache client event. Number of skipped unused packets from distributed cache", ValueType::Number) \
M(DistrCacheUnusedPacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were ignored", ValueType::Bytes) \
M(DistrCacheUnusedPacketsBufferAllocations, "Distributed Cache client event. The number of extra buffer allocations in case we could not reuse existing buffer", ValueType::Number) \
\
M(DistrCacheServerProcessRequestMicroseconds, "Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
M(DistrCacheLockRegistryMicroseconds, "Distributed Cache registry event. Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
M(DistrCacheRegistryUpdateMicroseconds, "Distributed Cache registry event. Time spent updating distributed cache registry", ValueType::Microseconds) \
M(DistrCacheRegistryUpdates, "Distributed Cache registry event. Number of distributed cache registry updates", ValueType::Number) \
M(DistrCacheHashRingRebuilds, "Distributed Cache registry event. Number of distributed cache hash ring rebuilds", ValueType::Number) \
\
M(DistrCacheReadBytesFromCache, "Distributed Cache read buffer event. Bytes read from distributed cache", ValueType::Bytes) \
M(DistrCacheReadBytesFromFallbackBuffer, "Distributed Cache read buffer event. Bytes read from fallback buffer", ValueType::Number) \
\
M(DistrCacheRangeResetBackward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
M(DistrCacheRangeResetForward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
\
M(DistrCacheOpenedConnections, "Distributed Cache connection event. The number of open connections to distributed cache", ValueType::Number) \
M(DistrCacheReusedConnections, "Distributed Cache connection event. The number of reused connections to distributed cache", ValueType::Number) \
M(DistrCacheOpenedConnectionsBypassingPool, "Distributed Cache connection event. The number of open connections to distributed cache bypassing pool", ValueType::Number) \
M(DistrCacheConnectMicroseconds, "Distributed Cache connection event. The time spent to connect to distributed cache", ValueType::Microseconds) \
M(DistrCacheConnectAttempts, "Distributed Cache connection event. The number of connection attempts to distributed cache", ValueType::Number) \
M(DistrCacheGetClientMicroseconds, "Distributed Cache connection event. Time spent getting client for distributed cache", ValueType::Microseconds) \
\
M(DistrCacheServerProcessRequestMicroseconds, "Distributed Cache server event. Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
M(DistrCacheServerStartRequestPackets, "Distributed Cache server event. Number of StartRequest packets in DistributedCacheServer", ValueType::Number) \
M(DistrCacheServerContinueRequestPackets, "Distributed Cache server event. Number of ContinueRequest packets in DistributedCacheServer", ValueType::Number) \
M(DistrCacheServerEndRequestPackets, "Distributed Cache server event. Number of EndRequest packets in DistributedCacheServer", ValueType::Number) \
M(DistrCacheServerAckRequestPackets, "Distributed Cache server event. Number of AckRequest packets in DistributedCacheServer", ValueType::Number) \
M(DistrCacheServerNewS3CachedClients, "Distributed Cache server event. The number of new cached s3 clients", ValueType::Number) \
M(DistrCacheServerReusedS3CachedClients, "Distributed Cache server event. The number of reused cached s3 clients", ValueType::Number) \
\
M(LogTest, "Number of log messages with level Test", ValueType::Number) \
M(LogTrace, "Number of log messages with level Trace", ValueType::Number) \
@ -788,15 +814,38 @@ The server successfully detected this situation and will download merged part fr
M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces", ValueType::Bytes) \
\
M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas", ValueType::Number) \
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
\
M(SharedMergeTreeVirtualPartsUpdates, "Virtual parts update count", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesByLeader, "Virtual parts updates by leader", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdateMicroseconds, "Virtual parts update microseconds", ValueType::Microseconds) \
M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeper, "Virtual parts updates count from ZooKeeper", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeperMicroseconds, "Virtual parts updates from ZooKeeper microseconds", ValueType::Microseconds) \
M(SharedMergeTreeVirtualPartsUpdatesPeerNotFound, "Virtual updates from peer failed because no one found", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesFromPeer, "Virtual parts updates count from peer", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesFromPeerMicroseconds, "Virtual parts updates from peer microseconds", ValueType::Microseconds) \
M(SharedMergeTreeVirtualPartsUpdatesForMergesOrStatus, "Virtual parts updates from non-default background job", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesLeaderFailedElection, "Virtual parts updates leader election failed", ValueType::Number) \
M(SharedMergeTreeVirtualPartsUpdatesLeaderSuccessfulElection, "Virtual parts updates leader election successful", ValueType::Number) \
M(SharedMergeTreeMergeMutationAssignmentAttempt, "How many times we tried to assign merge or mutation", ValueType::Number) \
M(SharedMergeTreeMergeMutationAssignmentFailedWithNothingToDo, "How many times we tried to assign merge or mutation and failed because nothing to merge", ValueType::Number) \
M(SharedMergeTreeMergeMutationAssignmentFailedWithConflict, "How many times we tried to assign merge or mutation and failed because of conflict in Keeper", ValueType::Number) \
M(SharedMergeTreeMergeMutationAssignmentSuccessful, "How many times we tried to assign merge or mutation", ValueType::Number) \
M(SharedMergeTreeMergePartsMovedToOudated, "How many parts moved to oudated directory", ValueType::Number) \
M(SharedMergeTreeMergePartsMovedToCondemned, "How many parts moved to condemned directory", ValueType::Number) \
M(SharedMergeTreeOutdatedPartsConfirmationRequest, "How many ZooKeeper requests were used to config outdated parts", ValueType::Number) \
M(SharedMergeTreeOutdatedPartsConfirmationInvocations, "How many invocations were made to confirm outdated parts", ValueType::Number) \
M(SharedMergeTreeOutdatedPartsHTTPRequest, "How many HTTP requests were send to confirm outdated parts", ValueType::Number) \
M(SharedMergeTreeOutdatedPartsHTTPResponse, "How many HTTP responses were send to confirm outdated parts", ValueType::Number) \
M(SharedMergeTreeCondemnedPartsKillRequest, "How many ZooKeeper requests were used to remove condemned parts", ValueType::Number) \
M(SharedMergeTreeCondemnedPartsLockConfict, "How many times we failed to acquite lock because of conflict", ValueType::Number) \
M(SharedMergeTreeCondemnedPartsRemoved, "How many condemned parts were removed", ValueType::Number) \
M(KeeperLogsEntryReadFromLatestCache, "Number of log entries in Keeper being read from latest logs cache", ValueType::Number) \
M(KeeperLogsEntryReadFromCommitCache, "Number of log entries in Keeper being read from commit logs cache", ValueType::Number) \
M(KeeperLogsEntryReadFromFile, "Number of log entries in Keeper being read directly from the changelog file", ValueType::Number) \
M(KeeperLogsPrefetchedEntries, "Number of log entries in Keeper being prefetched from the changelog file", ValueType::Number) \
\
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
\
M(StorageConnectionsCreated, "Number of created connections for storages", ValueType::Number) \
M(StorageConnectionsReused, "Number of reused connections for storages", ValueType::Number) \
M(StorageConnectionsReset, "Number of reset connections for storages", ValueType::Number) \
@ -828,6 +877,9 @@ The server successfully detected this situation and will download merged part fr
M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP", ValueType::Number) \
M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.", ValueType::Bytes) \
\
M(SharedDatabaseCatalogFailedToApplyState, "Number of failures to apply new state in SharedDatabaseCatalog", ValueType::Number) \
M(SharedDatabaseCatalogStateApplicationMicroseconds, "Total time spend on application of new state in SharedDatabaseCatalog", ValueType::Microseconds) \
\
M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan", ValueType::Number) \
M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)", ValueType::Number) \
M(GWPAsanFree, "Number of free operations done by GWPAsan", ValueType::Number) \

View File

@ -38,6 +38,9 @@ namespace ProfileEvents
};
Timer(Counters & counters_, Event timer_event_, Resolution resolution_);
Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_);
Timer(Timer && other) noexcept
: counters(other.counters), timer_event(std::move(other.timer_event)), watch(std::move(other.watch)), resolution(std::move(other.resolution))
{}
~Timer() { end(); }
void cancel() { watch.reset(); }
void restart() { watch.restart(); }

View File

@ -159,6 +159,8 @@ public:
const std::string & getLastKeeperErrorMessage() const { return keeper_error.message; }
/// action will be called only once and only after latest failed retry
/// NOTE: this one will be called only in case when retries finishes with Keeper exception
/// if it will be some other exception this function will not be called.
void actionAfterLastFailedRetry(std::function<void()> f) { action_after_last_failed_retry = std::move(f); }
const std::string & getName() const { return name; }

View File

@ -2700,7 +2700,7 @@ The maximum read speed in bytes per second for particular backup on server. Zero
Log query performance statistics into the query_log, query_thread_log and query_views_log.
)", 0) \
M(Bool, log_query_settings, true, R"(
Log query settings into the query_log.
Log query settings into the query_log and OpenTelemetry span log.
)", 0) \
M(Bool, log_query_threads, false, R"(
Setting up query threads logging.
@ -4812,6 +4812,9 @@ Max attempts to read with backoff
)", 0) \
M(Bool, enable_filesystem_cache, true, R"(
Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended
)", 0) \
M(String, filesystem_cache_name, "", R"(
Filesystem cache name to use for stateless table engines or data lakes
)", 0) \
M(Bool, enable_filesystem_cache_on_write_operations, false, R"(
Write into cache on write operations. To actually work this setting requires be added to disk config too
@ -5498,8 +5501,8 @@ Replace external dictionary sources to Null on restore. Useful for testing purpo
M(Bool, create_if_not_exists, false, R"(
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
)", 0) \
M(Bool, enable_secure_identifiers, false, R"(
If enabled, only allow secure identifiers which contain only underscore and alphanumeric characters
M(Bool, enforce_strict_identifier_format, false, R"(
If enabled, only allow identifiers containing alphanumeric characters and underscores.
)", 0) \
M(Bool, mongodb_throw_on_unsupported_query, true, R"(
If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'.
@ -6199,6 +6202,16 @@ std::vector<std::string_view> Settings::getUnchangedNames() const
return setting_names;
}
std::vector<std::string_view> Settings::getChangedNames() const
{
std::vector<std::string_view> setting_names;
for (const auto & setting : impl->allChanged())
{
setting_names.emplace_back(setting.getName());
}
return setting_names;
}
void Settings::dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const
{
MutableColumns & res_columns = params.res_columns;

View File

@ -134,6 +134,7 @@ struct Settings
std::vector<std::string_view> getAllRegisteredNames() const;
std::vector<std::string_view> getChangedAndObsoleteNames() const;
std::vector<std::string_view> getUnchangedNames() const;
std::vector<std::string_view> getChangedNames() const;
void dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const;
void dumpToMapColumn(IColumn * column, bool changed_only = true) const;

View File

@ -68,17 +68,18 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.10",
{
{"enforce_strict_identifier_format", false, false, "New setting."},
{"enable_parsing_to_custom_serialization", false, true, "New setting"},
{"mongodb_throw_on_unsupported_query", false, true, "New setting."},
{"enable_parallel_replicas", false, false, "Parallel replicas with read tasks became the Beta tier feature."},
{"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"},
{"filesystem_cache_name", "", "", "Filesystem cache name to use for stateless table engines or data lakes"},
{"restore_replace_external_dictionary_source_to_null", false, false, "New setting."},
{"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
{"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"output_format_native_write_json_as_string", false, false, "Add new setting to allow write JSON column as single String column in Native format"},
{"output_format_binary_write_json_as_string", false, false, "Add new setting to write values of JSON type as JSON string in RowBinary output format"},
{"input_format_binary_read_json_as_string", false, false, "Add new setting to read values of JSON type as JSON string in RowBinary input format"},
{"enable_secure_identifiers", false, false, "New setting."},
{"min_free_disk_bytes_to_perform_insert", 0, 0, "New setting."},
{"min_free_disk_ratio_to_perform_insert", 0.0, 0.0, "New setting."},
{"cloud_mode_database_engine", 1, 1, "A setting for ClickHouse Cloud"},

View File

@ -31,7 +31,7 @@ CachedObjectStorage::CachedObjectStorage(
FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
{
return cache->createKeyForPath(path);
return FileCacheKey::fromPath(path);
}
ObjectStorageKey
@ -71,7 +71,7 @@ std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObject( /// NOL
{
if (cache->isInitialized())
{
auto cache_key = cache->createKeyForPath(object.remote_path);
auto cache_key = FileCacheKey::fromPath(object.remote_path);
auto global_context = Context::getGlobalContextInstance();
auto modified_read_settings = read_settings.withNestedBuffer();

View File

@ -110,7 +110,7 @@ private:
};
const google::protobuf::Descriptor *
ProtobufSchemas::DescriptorHolder
ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
{
std::lock_guard lock(mutex);
@ -119,10 +119,10 @@ ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, Wi
it = importers
.emplace(
info.schemaDirectory(),
std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
std::make_shared<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
.first;
auto * importer = it->second.get();
return importer->import(info.schemaPath(), info.messageName());
return DescriptorHolder(it->second, importer->import(info.schemaPath(), info.messageName()));
}
}

View File

@ -57,14 +57,31 @@ public:
// Clear cached protobuf schemas
void clear();
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
const google::protobuf::Descriptor *
class ImporterWithSourceTree;
struct DescriptorHolder
{
DescriptorHolder(std::shared_ptr<ImporterWithSourceTree> importer_, const google::protobuf::Descriptor * message_descriptor_)
: importer(std::move(importer_))
, message_descriptor(message_descriptor_)
{}
private:
std::shared_ptr<ImporterWithSourceTree> importer;
public:
const google::protobuf::Descriptor * message_descriptor;
};
/// Parses the format schema, then parses the corresponding proto file, and
/// returns holder (since the descriptor only valid if
/// ImporterWithSourceTree is valid):
///
/// {ImporterWithSourceTree, protobuf::Descriptor - descriptor of the message type}.
///
/// The function always return valid message descriptor, it throws an exception if it cannot load or parse the file.
DescriptorHolder
getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
private:
class ImporterWithSourceTree;
std::unordered_map<String, std::unique_ptr<ImporterWithSourceTree>> importers;
std::unordered_map<String, std::shared_ptr<ImporterWithSourceTree>> importers;
std::mutex mutex;
};

View File

@ -3864,26 +3864,32 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
const Strings & column_names,
const DataTypes & data_types,
std::vector<size_t> & missing_column_indices,
const google::protobuf::Descriptor & message_descriptor,
const ProtobufSchemas::DescriptorHolder & descriptor,
bool with_length_delimiter,
bool with_envelope,
bool flatten_google_wrappers,
ProtobufReader & reader)
{
return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, flatten_google_wrappers);
return ProtobufSerializerBuilder(reader).buildMessageSerializer(
column_names, data_types, missing_column_indices,
*descriptor.message_descriptor,
with_length_delimiter, with_envelope, flatten_google_wrappers);
}
std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
const Strings & column_names,
const DataTypes & data_types,
const google::protobuf::Descriptor & message_descriptor,
const ProtobufSchemas::DescriptorHolder & descriptor,
bool with_length_delimiter,
bool with_envelope,
bool defaults_for_nullable_google_wrappers,
ProtobufWriter & writer)
{
std::vector<size_t> missing_column_indices;
return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
return ProtobufSerializerBuilder(writer).buildMessageSerializer(
column_names, data_types, missing_column_indices,
*descriptor.message_descriptor,
with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
}
NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor, bool skip_unsupported_fields)

View File

@ -4,7 +4,8 @@
#if USE_PROTOBUF
# include <Columns/IColumn.h>
#include <Core/NamesAndTypes.h>
# include <Core/NamesAndTypes.h>
# include <Formats/ProtobufSchemas.h>
namespace google::protobuf { class Descriptor; }
@ -39,7 +40,7 @@ public:
const Strings & column_names,
const DataTypes & data_types,
std::vector<size_t> & missing_column_indices,
const google::protobuf::Descriptor & message_descriptor,
const ProtobufSchemas::DescriptorHolder & descriptor,
bool with_length_delimiter,
bool with_envelope,
bool flatten_google_wrappers,
@ -48,7 +49,7 @@ public:
static std::unique_ptr<ProtobufSerializer> create(
const Strings & column_names,
const DataTypes & data_types,
const google::protobuf::Descriptor & message_descriptor,
const ProtobufSchemas::DescriptorHolder & descriptor,
bool with_length_delimiter,
bool with_envelope,
bool defaults_for_nullable_google_wrappers,

View File

@ -122,11 +122,6 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
query_limit = std::make_unique<FileCacheQueryLimit>();
}
FileCache::Key FileCache::createKeyForPath(const String & path)
{
return Key(path);
}
const FileCache::UserInfo & FileCache::getCommonUser()
{
static UserInfo user(getCommonUserID(), 0);
@ -1083,7 +1078,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc()
if (eviction_candidates.size() > 0)
{
LOG_TRACE(log, "Current usage {}/{} in size, {}/{} in elements count "
"(trying to keep size ration at {} and elements ratio at {}). "
"(trying to keep size ratio at {} and elements ratio at {}). "
"Collected {} eviction candidates, "
"skipped {} candidates while iterating",
main_priority->getSize(lock), size_limit,
@ -1168,7 +1163,7 @@ void FileCache::removeFileSegment(const Key & key, size_t offset, const UserID &
void FileCache::removePathIfExists(const String & path, const UserID & user_id)
{
removeKeyIfExists(createKeyForPath(path), user_id);
removeKeyIfExists(Key::fromPath(path), user_id);
}
void FileCache::removeAllReleasable(const UserID & user_id)

View File

@ -88,8 +88,6 @@ public:
const String & getBasePath() const;
static Key createKeyForPath(const String & path);
static const UserInfo & getCommonUser();
static const UserInfo & getInternalUser();

View File

@ -1,5 +1,6 @@
#include "FileCacheFactory.h"
#include "FileCache.h"
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
@ -43,6 +44,16 @@ FileCacheFactory::CacheByName FileCacheFactory::getAll()
return caches_by_name;
}
FileCachePtr FileCacheFactory::get(const std::string & cache_name)
{
std::lock_guard lock(mutex);
auto it = caches_by_name.find(cache_name);
if (it == caches_by_name.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no cache by name `{}`", cache_name);
return it->second->cache;
}
FileCachePtr FileCacheFactory::getOrCreate(
const std::string & cache_name,
const FileCacheSettings & file_cache_settings,
@ -202,4 +213,20 @@ void FileCacheFactory::clear()
caches_by_name.clear();
}
void FileCacheFactory::loadDefaultCaches(const Poco::Util::AbstractConfiguration & config)
{
Poco::Util::AbstractConfiguration::Keys cache_names;
config.keys(FILECACHE_DEFAULT_CONFIG_PATH, cache_names);
auto * log = &Poco::Logger::get("FileCacheFactory");
LOG_DEBUG(log, "Will load {} caches from default cache config", cache_names.size());
for (const auto & name : cache_names)
{
FileCacheSettings settings;
const auto & config_path = fmt::format("{}.{}", FILECACHE_DEFAULT_CONFIG_PATH, name);
settings.loadFromConfig(config, config_path);
auto cache = getOrCreate(name, settings, config_path);
cache->initialize();
LOG_DEBUG(log, "Loaded cache `{}` from default cache config", name);
}
}
}

View File

@ -44,6 +44,8 @@ public:
const FileCacheSettings & file_cache_settings,
const std::string & config_path);
FileCachePtr get(const std::string & cache_name);
FileCachePtr create(
const std::string & cache_name,
const FileCacheSettings & file_cache_settings,
@ -53,8 +55,12 @@ public:
FileCacheDataPtr getByName(const std::string & cache_name);
void loadDefaultCaches(const Poco::Util::AbstractConfiguration & config);
void updateSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
void remove(FileCachePtr cache);
void clear();
private:

View File

@ -12,11 +12,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
FileCacheKey::FileCacheKey(const std::string & path)
: key(sipHash128(path.data(), path.size()))
{
}
FileCacheKey::FileCacheKey(const UInt128 & key_)
: key(key_)
{
@ -32,6 +27,16 @@ FileCacheKey FileCacheKey::random()
return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
}
FileCacheKey FileCacheKey::fromPath(const std::string & path)
{
return FileCacheKey(sipHash128(path.data(), path.size()));
}
FileCacheKey FileCacheKey::fromKey(const UInt128 & key)
{
return FileCacheKey(key);
}
FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str)
{
if (key_str.size() != 32)

View File

@ -14,16 +14,16 @@ struct FileCacheKey
FileCacheKey() = default;
explicit FileCacheKey(const std::string & path);
explicit FileCacheKey(const UInt128 & key_);
static FileCacheKey random();
static FileCacheKey fromPath(const std::string & path);
static FileCacheKey fromKey(const UInt128 & key);
static FileCacheKey fromKeyString(const std::string & key_str);
bool operator==(const FileCacheKey & other) const { return key == other.key; }
bool operator<(const FileCacheKey & other) const { return key < other.key; }
static FileCacheKey fromKeyString(const std::string & key_str);
private:
explicit FileCacheKey(const UInt128 & key_);
};
using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;

View File

@ -15,10 +15,12 @@ static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO = 0; /// Disabled.
static constexpr double FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO = 0; /// Disabled.
static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 10;
static constexpr auto FILECACHE_DEFAULT_CONFIG_PATH = "filesystem_caches";
class FileCache;
using FileCachePtr = std::shared_ptr<FileCache>;
struct FileCacheSettings;
struct FileCacheKey;
}

View File

@ -5,6 +5,7 @@
#include <Common/MemoryTrackerBlockerInThread.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/FailPoint.h>
#include <Common/FieldVisitorToString.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Interpreters/Cache/QueryCache.h>
@ -155,7 +156,7 @@ namespace Setting
extern const SettingsBool use_query_cache;
extern const SettingsBool wait_for_async_insert;
extern const SettingsSeconds wait_for_async_insert_timeout;
extern const SettingsBool enable_secure_identifiers;
extern const SettingsBool enforce_strict_identifier_format;
}
namespace ErrorCodes
@ -565,6 +566,25 @@ void logQueryFinish(
query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows);
query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes);
query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage);
if (context)
{
std::string user_name = context->getUserName();
query_span->addAttribute("clickhouse.user", user_name);
}
if (settings[Setting::log_query_settings])
{
auto changed_settings_names = settings.getChangedNames();
for (const auto & name : changed_settings_names)
{
Field value = settings.get(name);
String value_str = convertFieldToString(value);
query_span->addAttribute(fmt::format("clickhouse.setting.{}", name), value_str);
}
}
query_span->finish();
}
}
@ -999,12 +1019,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
InterpreterSetQuery::applySettingsFromQuery(ast, context);
validateAnalyzerSettings(ast, settings[Setting::allow_experimental_analyzer]);
if (settings[Setting::enable_secure_identifiers])
if (settings[Setting::enforce_strict_identifier_format])
{
WriteBufferFromOwnString buf;
IAST::FormatSettings enable_secure_identifiers_settings(buf, true);
enable_secure_identifiers_settings.enable_secure_identifiers = true;
ast->format(enable_secure_identifiers_settings);
IAST::FormatSettings enforce_strict_identifier_format_settings(buf, true);
enforce_strict_identifier_format_settings.enforce_strict_identifier_format = true;
ast->format(enforce_strict_identifier_format_settings);
}
if (auto * insert_query = ast->as<ASTInsertQuery>())

View File

@ -372,7 +372,7 @@ TEST_F(FileCacheTest, LRUPolicy)
std::cerr << "Step 1\n";
auto cache = DB::FileCache("1", settings);
cache.initialize();
auto key = DB::FileCache::createKeyForPath("key1");
auto key = DB::FileCacheKey::fromPath("key1");
auto get_or_set = [&](size_t offset, size_t size)
{
@ -736,7 +736,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache2 = DB::FileCache("2", settings);
cache2.initialize();
auto key = DB::FileCache::createKeyForPath("key1");
auto key = DB::FileCacheKey::fromPath("key1");
/// Get [2, 29]
assertEqual(
@ -755,7 +755,7 @@ TEST_F(FileCacheTest, LRUPolicy)
fs::create_directories(settings2.base_path);
auto cache2 = DB::FileCache("3", settings2);
cache2.initialize();
auto key = DB::FileCache::createKeyForPath("key1");
auto key = DB::FileCacheKey::fromPath("key1");
/// Get [0, 24]
assertEqual(
@ -770,7 +770,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache = FileCache("4", settings);
cache.initialize();
const auto key = FileCache::createKeyForPath("key10");
const auto key = FileCacheKey::fromPath("key10");
const auto key_path = cache.getKeyPath(key, user);
cache.removeAllReleasable(user.user_id);
@ -794,7 +794,7 @@ TEST_F(FileCacheTest, LRUPolicy)
auto cache = DB::FileCache("5", settings);
cache.initialize();
const auto key = FileCache::createKeyForPath("key10");
const auto key = FileCacheKey::fromPath("key10");
const auto key_path = cache.getKeyPath(key, user);
cache.removeAllReleasable(user.user_id);
@ -833,7 +833,7 @@ TEST_F(FileCacheTest, writeBuffer)
segment_settings.kind = FileSegmentKind::Ephemeral;
segment_settings.unbounded = true;
auto cache_key = FileCache::createKeyForPath(key);
auto cache_key = FileCacheKey::fromPath(key);
auto holder = cache.set(cache_key, 0, 3, segment_settings, user);
/// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
std::filesystem::create_directories(cache.getKeyPath(cache_key, user));
@ -961,7 +961,7 @@ TEST_F(FileCacheTest, temporaryData)
const auto user = FileCache::getCommonUser();
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, TemporaryDataOnDiskSettings{});
auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
auto some_data_holder = file_cache.getOrSet(FileCacheKey::fromPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
{
ASSERT_EQ(some_data_holder->size(), 5);
@ -1103,7 +1103,7 @@ TEST_F(FileCacheTest, CachedReadBuffer)
auto cache = std::make_shared<DB::FileCache>("8", settings);
cache->initialize();
auto key = cache->createKeyForPath(file_path);
auto key = DB::FileCacheKey::fromPath(file_path);
const auto user = FileCache::getCommonUser();
{
@ -1219,7 +1219,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
{
auto cache = DB::FileCache(std::to_string(++file_cache_name), settings);
cache.initialize();
auto key = FileCache::createKeyForPath("key1");
auto key = FileCacheKey::fromPath("key1");
auto add_range = [&](size_t offset, size_t size)
{
@ -1342,7 +1342,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
std::string data1(15, '*');
auto file1 = write_file("test1", data1);
auto key1 = cache->createKeyForPath(file1);
auto key1 = DB::FileCacheKey::fromPath(file1);
read_and_check(file1, key1, data1);
@ -1358,7 +1358,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
std::string data2(10, '*');
auto file2 = write_file("test2", data2);
auto key2 = cache->createKeyForPath(file2);
auto key2 = DB::FileCacheKey::fromPath(file2);
read_and_check(file2, key2, data2);

View File

@ -8,6 +8,7 @@
#include <Poco/String.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/SipHash.h>
#include <Common/StringUtils.h>
#include <algorithm>
namespace DB
@ -265,14 +266,14 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool ambiguous)
void IAST::FormatSettings::checkIdentifier(const String & name) const
{
if (enable_secure_identifiers)
if (enforce_strict_identifier_format)
{
bool is_secure_identifier = std::all_of(name.begin(), name.end(), [](char ch) { return std::isalnum(ch) || ch == '_'; });
if (!is_secure_identifier)
bool is_word_char_identifier = std::all_of(name.begin(), name.end(), isWordCharASCII);
if (!is_word_char_identifier)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Not a secure identifier: `{}`, a secure identifier must contain only underscore and alphanumeric characters",
"Identifier '{}' contains characters other than alphanumeric and cannot be when enforce_strict_identifier_format is enabled",
name);
}
}

View File

@ -202,7 +202,7 @@ public:
char nl_or_ws; /// Newline or whitespace.
LiteralEscapingStyle literal_escaping_style;
bool print_pretty_type_names;
bool enable_secure_identifiers;
bool enforce_strict_identifier_format;
explicit FormatSettings(
WriteBuffer & ostr_,
@ -213,7 +213,7 @@ public:
bool show_secrets_ = true,
LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular,
bool print_pretty_type_names_ = false,
bool enable_secure_identifiers_ = false)
bool enforce_strict_identifier_format_ = false)
: ostr(ostr_)
, one_line(one_line_)
, hilite(hilite_)
@ -223,7 +223,7 @@ public:
, nl_or_ws(one_line ? ' ' : '\n')
, literal_escaping_style(literal_escaping_style_)
, print_pretty_type_names(print_pretty_type_names_)
, enable_secure_identifiers(enable_secure_identifiers_)
, enforce_strict_identifier_format(enforce_strict_identifier_format_)
{
}
@ -237,7 +237,7 @@ public:
, nl_or_ws(other.nl_or_ws)
, literal_escaping_style(other.literal_escaping_style)
, print_pretty_type_names(other.print_pretty_type_names)
, enable_secure_identifiers(other.enable_secure_identifiers)
, enforce_strict_identifier_format(other.enforce_strict_identifier_format)
{
}

View File

@ -23,7 +23,7 @@ ProtobufListInputFormat::ProtobufListInputFormat(
header_.getNames(),
header_.getDataTypes(),
missing_column_indices,
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
/* with_length_delimiter = */ true,
/* with_envelope = */ true,
@ -93,9 +93,9 @@ ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format
NamesAndTypesList ProtobufListSchemaReader::readSchema()
{
const auto * message_descriptor
= ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
auto descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
return protobufSchemaToCHSchema(descriptor.message_descriptor, skip_unsupported_fields);
}
void registerInputFormatProtobufList(FormatFactory & factory)

View File

@ -20,7 +20,7 @@ ProtobufListOutputFormat::ProtobufListOutputFormat(
, serializer(ProtobufSerializer::create(
header_.getNames(),
header_.getDataTypes(),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
/* with_length_delimiter = */ true,
/* with_envelope = */ true,

View File

@ -19,7 +19,7 @@ ProtobufRowInputFormat::ProtobufRowInputFormat(
bool flatten_google_wrappers_,
const String & google_protos_path)
: IRowInputFormat(header_, in_, params_)
, message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
, descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, google_protos_path))
, with_length_delimiter(with_length_delimiter_)
, flatten_google_wrappers(flatten_google_wrappers_)
@ -33,7 +33,7 @@ void ProtobufRowInputFormat::createReaderAndSerializer()
getPort().getHeader().getNames(),
getPort().getHeader().getDataTypes(),
missing_column_indices,
*message_descriptor,
descriptor,
with_length_delimiter,
/* with_envelope = */ false,
flatten_google_wrappers,
@ -132,9 +132,9 @@ ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_setting
NamesAndTypesList ProtobufSchemaReader::readSchema()
{
const auto * message_descriptor
= ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
auto descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
return protobufSchemaToCHSchema(descriptor.message_descriptor, skip_unsupported_fields);
}
void registerProtobufSchemaReader(FormatFactory & factory)

View File

@ -6,7 +6,7 @@
# include <Processors/Formats/IRowInputFormat.h>
# include <Processors/Formats/ISchemaReader.h>
# include <Formats/FormatSchemaInfo.h>
# include <google/protobuf/descriptor.h>
# include <Formats/ProtobufSchemas.h>
namespace DB
{
@ -57,7 +57,7 @@ private:
std::vector<size_t> missing_column_indices;
std::unique_ptr<ProtobufSerializer> serializer;
const google::protobuf::Descriptor * message_descriptor;
const ProtobufSchemas::DescriptorHolder descriptor;
bool with_length_delimiter;
bool flatten_google_wrappers;
};

View File

@ -27,7 +27,7 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
, serializer(ProtobufSerializer::create(
header_.getNames(),
header_.getDataTypes(),
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
ProtobufSchemas::instance().getMessageTypeForFormatSchema(
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, settings_.protobuf.google_protos_path),
with_length_delimiter_,
/* with_envelope = */ false,

View File

@ -365,11 +365,11 @@ public:
/// Set limits for current storage.
/// Different limits may be applied to different storages, we need to keep it per processor.
/// This method is need to be override only for sources.
/// This method needs to be overridden only for sources.
virtual void setStorageLimits(const std::shared_ptr<const StorageLimitsList> & /*storage_limits*/) {}
/// This method is called for every processor without input ports.
/// Processor can return a new progress for the last read operation.
/// Processor can return new progress for the last read operation.
/// You should zero internal counters in the call, in order to make in idempotent.
virtual std::optional<ReadProgress> getReadProgress() { return std::nullopt; }

View File

@ -1495,11 +1495,10 @@ void WindowTransform::work()
}
}
struct WindowFunctionRank final : public WindowFunction
struct WindowFunctionRank final : public StatelessWindowFunction
{
WindowFunctionRank(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
WindowFunctionRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
{}
bool allocatesMemoryInArena() const override { return false; }
@ -1514,11 +1513,10 @@ struct WindowFunctionRank final : public WindowFunction
}
};
struct WindowFunctionDenseRank final : public WindowFunction
struct WindowFunctionDenseRank final : public StatelessWindowFunction
{
WindowFunctionDenseRank(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
WindowFunctionDenseRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
{}
bool allocatesMemoryInArena() const override { return false; }
@ -1716,7 +1714,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
const Float64 decay_length;
};
struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
struct WindowFunctionExponentialTimeDecayedMax final : public StatelessWindowFunction
{
static constexpr size_t ARGUMENT_VALUE = 0;
static constexpr size_t ARGUMENT_TIME = 1;
@ -1731,9 +1729,8 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
}
WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
WindowFunctionExponentialTimeDecayedMax(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
, decay_length(getDecayLength(parameters_, name_))
{
if (argument_types.size() != 2)
@ -1991,11 +1988,10 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
const Float64 decay_length;
};
struct WindowFunctionRowNumber final : public WindowFunction
struct WindowFunctionRowNumber final : public StatelessWindowFunction
{
WindowFunctionRowNumber(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
WindowFunctionRowNumber(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
{}
bool allocatesMemoryInArena() const override { return false; }
@ -2273,13 +2269,12 @@ public:
// ClickHouse-specific variant of lag/lead that respects the window frame.
template <bool is_lead>
struct WindowFunctionLagLeadInFrame final : public WindowFunction
struct WindowFunctionLagLeadInFrame final : public StatelessWindowFunction
{
FunctionBasePtr func_cast = nullptr;
WindowFunctionLagLeadInFrame(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_))
WindowFunctionLagLeadInFrame(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_))
{
if (!parameters.empty())
{
@ -2427,11 +2422,10 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
}
};
struct WindowFunctionNthValue final : public WindowFunction
struct WindowFunctionNthValue final : public StatelessWindowFunction
{
WindowFunctionNthValue(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_))
WindowFunctionNthValue(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
: StatelessWindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_))
{
if (!parameters.empty())
{

View File

@ -14,6 +14,7 @@
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
#include <Storages/ObjectStorage/DataLakes/Common.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Processors/Formats/Impl/ArrowBufferedStreams.h>
#include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
@ -185,7 +186,8 @@ struct DeltaLakeMetadataImpl
std::set<String> & result)
{
auto read_settings = context->getReadSettings();
auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
StorageObjectStorageSource::ObjectInfo object_info(metadata_file_path);
auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
char c;
while (!buf->eof())
@ -492,7 +494,8 @@ struct DeltaLakeMetadataImpl
String json_str;
auto read_settings = context->getReadSettings();
auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings);
StorageObjectStorageSource::ObjectInfo object_info(last_checkpoint_file);
auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
readJSONObjectPossiblyInvalid(json_str, *buf);
const JSON json(json_str);
@ -557,7 +560,8 @@ struct DeltaLakeMetadataImpl
LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
auto read_settings = context->getReadSettings();
auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings);
StorageObjectStorageSource::ObjectInfo object_info(checkpoint_path);
auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
auto format_settings = getFormatSettings(context);
/// Force nullable, because this parquet file for some reason does not have nullable

View File

@ -26,6 +26,7 @@
#include <Processors/Formats/Impl/AvroRowInputFormat.h>
#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
#include <Storages/ObjectStorage/DataLakes/Common.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Poco/JSON/Array.h>
#include <Poco/JSON/Object.h>
@ -387,9 +388,13 @@ DataLakeMetadataPtr IcebergMetadata::create(
ContextPtr local_context)
{
const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration);
LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path);
auto read_settings = local_context->getReadSettings();
auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
auto log = getLogger("IcebergMetadata");
LOG_DEBUG(log, "Parse metadata {}", metadata_file_path);
StorageObjectStorageSource::ObjectInfo object_info(metadata_file_path);
auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, local_context, log);
String json_str;
readJSONObjectPossiblyInvalid(json_str, *buf);
@ -456,8 +461,8 @@ Strings IcebergMetadata::getDataFiles() const
LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file);
auto context = getContext();
auto read_settings = context->getReadSettings();
auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings);
StorageObjectStorageSource::ObjectInfo object_info(manifest_list_file);
auto manifest_list_buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
auto manifest_list_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*manifest_list_buf));
auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0));
@ -487,7 +492,8 @@ Strings IcebergMetadata::getDataFiles() const
{
LOG_TEST(log, "Process manifest file {}", manifest_file);
auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings);
StorageObjectStorageSource::ObjectInfo manifest_object_info(manifest_file);
auto buffer = StorageObjectStorageSource::createReadBuffer(manifest_object_info, object_storage, context, log);
auto manifest_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*buffer));
/// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema,

View File

@ -150,7 +150,7 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
auto context = getContext();
const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings());
auto impl = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, context, getLogger("ReadBufferIterator"));
const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
const auto zstd_window = static_cast<int>(context->getSettingsRef()[Setting::zstd_window_log_max]);
@ -276,11 +276,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
else
{
compression_method = chooseCompressionMethod(filename, configuration->compression_method);
read_buf = object_storage->readObject(
StoredObject(current_object_info->getPath()),
getContext()->getReadSettings(),
{},
current_object_info->metadata->size_bytes);
read_buf = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, getContext(), getLogger("ReadBufferIterator"));
}
if (!query_settings.skip_empty_files || !read_buf->eof())

View File

@ -7,6 +7,9 @@
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Transforms/ExtractColumnsTransform.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Interpreters/Cache/FileCache.h>
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
#include <IO/Archives/createArchiveReader.h>
#include <Formats/FormatFactory.h>
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
@ -37,6 +40,7 @@ namespace Setting
extern const SettingsUInt64 max_download_buffer_size;
extern const SettingsMaxThreads max_threads;
extern const SettingsBool use_cache_for_count_from_files;
extern const SettingsString filesystem_cache_name;
}
namespace ErrorCodes
@ -420,44 +424,110 @@ std::future<StorageObjectStorageSource::ReaderHolder> StorageObjectStorageSource
return create_reader_scheduler([=, this] { return createReader(); }, Priority{});
}
std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(
const ObjectInfo & object_info, const ObjectStoragePtr & object_storage, const ContextPtr & context_, const LoggerPtr & log)
std::unique_ptr<ReadBufferFromFileBase> StorageObjectStorageSource::createReadBuffer(
ObjectInfo & object_info, const ObjectStoragePtr & object_storage, const ContextPtr & context_, const LoggerPtr & log)
{
const auto & settings = context_->getSettingsRef();
const auto & read_settings = context_->getReadSettings();
const auto filesystem_cache_name = settings[Setting::filesystem_cache_name].value;
bool use_cache = read_settings.enable_filesystem_cache
&& !filesystem_cache_name.empty()
&& (object_storage->getType() == ObjectStorageType::Azure
|| object_storage->getType() == ObjectStorageType::S3);
if (!object_info.metadata)
{
if (!use_cache)
{
return object_storage->readObject(StoredObject(object_info.getPath()), read_settings);
}
object_info.metadata = object_storage->getObjectMetadata(object_info.getPath());
}
const auto & object_size = object_info.metadata->size_bytes;
auto read_settings = context_->getReadSettings().adjustBufferSize(object_size);
auto modified_read_settings = read_settings.adjustBufferSize(object_size);
/// FIXME: Changing this setting to default value breaks something around parquet reading
read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size;
modified_read_settings.remote_read_min_bytes_for_seek = modified_read_settings.remote_fs_buffer_size;
/// User's object may change, don't cache it.
read_settings.enable_filesystem_cache = false;
read_settings.use_page_cache_for_disks_without_file_cache = false;
const bool object_too_small = object_size <= 2 * context_->getSettingsRef()[Setting::max_download_buffer_size];
const bool use_prefetch = object_too_small
&& read_settings.remote_fs_method == RemoteFSReadMethod::threadpool
&& read_settings.remote_fs_prefetch;
if (use_prefetch)
read_settings.remote_read_buffer_use_external_buffer = true;
auto impl = object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings);
modified_read_settings.use_page_cache_for_disks_without_file_cache = false;
// Create a read buffer that will prefetch the first ~1 MB of the file.
// When reading lots of tiny files, this prefetching almost doubles the throughput.
// For bigger files, parallel reading is more useful.
if (!use_prefetch)
const bool object_too_small = object_size <= 2 * context_->getSettingsRef()[Setting::max_download_buffer_size];
const bool use_prefetch = object_too_small
&& modified_read_settings.remote_fs_method == RemoteFSReadMethod::threadpool
&& modified_read_settings.remote_fs_prefetch;
/// FIXME: Use async buffer if use_cache,
/// because CachedOnDiskReadBufferFromFile does not work as an independent buffer currently.
const bool use_async_buffer = use_prefetch || use_cache;
if (use_async_buffer)
modified_read_settings.remote_read_buffer_use_external_buffer = true;
std::unique_ptr<ReadBufferFromFileBase> impl;
if (use_cache)
{
if (object_info.metadata->etag.empty())
{
LOG_WARNING(log, "Cannot use filesystem cache, no etag specified");
}
else
{
SipHash hash;
hash.update(object_info.getPath());
hash.update(object_info.metadata->etag);
const auto cache_key = FileCacheKey::fromKey(hash.get128());
auto cache = FileCacheFactory::instance().get(filesystem_cache_name);
auto read_buffer_creator = [path = object_info.getPath(), object_size, modified_read_settings, object_storage]()
{
return object_storage->readObject(StoredObject(path, "", object_size), modified_read_settings);
};
impl = std::make_unique<CachedOnDiskReadBufferFromFile>(
object_info.getPath(),
cache_key,
cache,
FileCache::getCommonUser(),
read_buffer_creator,
modified_read_settings,
std::string(CurrentThread::getQueryId()),
object_size,
/* allow_seeks */true,
/* use_external_buffer */true,
/* read_until_position */std::nullopt,
context_->getFilesystemCacheLog());
LOG_TEST(log, "Using filesystem cache `{}` (path: {}, etag: {}, hash: {})",
filesystem_cache_name, object_info.getPath(),
object_info.metadata->etag, toString(hash.get128()));
}
}
if (!impl)
impl = object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), modified_read_settings);
if (!use_async_buffer)
return impl;
LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
auto & reader = context_->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
impl = std::make_unique<AsynchronousBoundedReadBuffer>(
std::move(impl), reader, read_settings,
std::move(impl), reader, modified_read_settings,
context_->getAsyncReadCounters(),
context_->getFilesystemReadPrefetchesLog());
impl->setReadUntilEnd();
impl->prefetch(DEFAULT_PREFETCH_PRIORITY);
if (use_prefetch)
{
impl->setReadUntilEnd();
impl->prefetch(DEFAULT_PREFETCH_PRIORITY);
}
return impl;
}
@ -787,8 +857,7 @@ StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr o
/* path_to_archive */object_info->getPath(),
/* archive_read_function */[=, this]()
{
StoredObject stored_object(object_info->getPath(), "", size);
return object_storage->readObject(stored_object, getContext()->getReadSettings());
return StorageObjectStorageSource::createReadBuffer(*object_info, object_storage, getContext(), logger);
},
/* archive_size */size);
}

View File

@ -66,6 +66,11 @@ public:
const ObjectInfo & object_info,
bool include_connection_info = true);
static std::unique_ptr<ReadBufferFromFileBase> createReadBuffer(
ObjectInfo & object_info,
const ObjectStoragePtr & object_storage,
const ContextPtr & context_,
const LoggerPtr & log);
protected:
const String name;
ObjectStoragePtr object_storage;
@ -135,11 +140,6 @@ protected:
ReaderHolder createReader();
std::future<ReaderHolder> createReaderAsync();
static std::unique_ptr<ReadBuffer> createReadBuffer(
const ObjectInfo & object_info,
const ObjectStoragePtr & object_storage,
const ContextPtr & context_,
const LoggerPtr & log);
void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows);
void lazyInitialize();

View File

@ -401,7 +401,7 @@ Chunk SystemRemoteDataPathsSource::generate()
if (cache)
{
auto cache_paths = cache->tryGetCachePaths(cache->createKeyForPath(object.remote_path));
auto cache_paths = cache->tryGetCachePaths(FileCacheKey::fromPath(object.remote_path));
col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
}
else

View File

@ -292,6 +292,7 @@ def clickhouse_execute_http(
"http_receive_timeout": timeout,
"http_send_timeout": timeout,
"output_format_parallel_formatting": 0,
"max_rows_to_read": 0, # Some queries read from system.text_log which might get too big
}
if settings is not None:
params.update(settings)

View File

@ -19,7 +19,7 @@
<type>cache</type>
<disk>s3_disk</disk>
<path>s3_cache/</path>
<max_size>104857600</max_size>
<max_size>209715200</max_size>
<max_file_segment_size>5Mi</max_file_segment_size>
<cache_on_write_operations>1</cache_on_write_operations>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>

View File

@ -1,7 +1,7 @@
services:
postgres1:
image: postgres
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=4", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_statement=all", "-c", "max_connections=200"]
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=4", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_statement=all", "-c", "max_connections=200", "-c", "statement_timeout=180000"] # statement_timeout: avoid test timeout because of postgres getting unavailable
restart: always
expose:
- ${POSTGRES_PORT:-5432}

View File

@ -1412,7 +1412,7 @@ def test_parallel_read(cluster):
res = azure_query(
node,
f"select count() from azureBlobStorage('{connection_string}', 'cont', 'test_parallel_read.parquet')",
f"select count() from azureBlobStorage('{connection_string}', 'cont', 'test_parallel_read.parquet') settings remote_filesystem_read_method='read'",
)
assert int(res) == 10000
assert_logs_contain_with_retry(node, "AzureBlobStorage readBigAt read bytes")

View File

@ -0,0 +1,8 @@
<clickhouse>
<filesystem_caches>
<cache1>
<max_size>1Gi</max_size>
<path>cache1</path>
</cache1>
</filesystem_caches>
</clickhouse>

View File

@ -5,6 +5,7 @@ import os
import random
import string
import time
import uuid
from datetime import datetime
import delta
@ -70,7 +71,10 @@ def started_cluster():
cluster = ClickHouseCluster(__file__, with_spark=True)
cluster.add_instance(
"node1",
main_configs=["configs/config.d/named_collections.xml"],
main_configs=[
"configs/config.d/named_collections.xml",
"configs/config.d/filesystem_caches.xml",
],
user_configs=["configs/users.d/users.xml"],
with_minio=True,
stay_alive=True,
@ -826,3 +830,64 @@ def test_complex_types(started_cluster):
f"SELECT metadata FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')"
)
)
@pytest.mark.parametrize("storage_type", ["s3"])
def test_filesystem_cache(started_cluster, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
TABLE_NAME = randomize_table_name("test_filesystem_cache")
bucket = started_cluster.minio_bucket
if not minio_client.bucket_exists(bucket):
minio_client.make_bucket(bucket)
parquet_data_path = create_initial_data_file(
started_cluster,
instance,
"SELECT number, toString(number) FROM numbers(100)",
TABLE_NAME,
)
write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}")
upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "")
create_delta_table(instance, TABLE_NAME, bucket=bucket)
query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
instance.query(
f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
count = int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 < int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
instance.query(
f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
assert count == int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 == int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)

View File

@ -0,0 +1,8 @@
<clickhouse>
<filesystem_caches>
<cache1>
<max_size>1Gi</max_size>
<path>cache1</path>
</cache1>
</filesystem_caches>
</clickhouse>

View File

@ -72,7 +72,10 @@ def started_cluster():
with_hdfs = False
cluster.add_instance(
"node1",
main_configs=["configs/config.d/named_collections.xml"],
main_configs=[
"configs/config.d/named_collections.xml",
"configs/config.d/filesystem_caches.xml",
],
user_configs=["configs/users.d/users.xml"],
with_minio=True,
with_azurite=True,
@ -870,3 +873,66 @@ def test_restart_broken_s3(started_cluster):
)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
@pytest.mark.parametrize("storage_type", ["s3"])
def test_filesystem_cache(started_cluster, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
TABLE_NAME = "test_filesystem_cache_" + storage_type + "_" + get_uuid_str()
write_iceberg_from_df(
spark,
generate_data(spark, 0, 10),
TABLE_NAME,
mode="overwrite",
format_version="1",
partition_by="a",
)
default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
instance.query(
f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
count = int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 < int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
instance.query(
f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
assert count == int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 == int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)

View File

@ -0,0 +1,8 @@
<clickhouse>
<filesystem_caches>
<cache1>
<max_size>1Gi</max_size>
<path>cache1</path>
</cache1>
</filesystem_caches>
</clickhouse>

View File

@ -56,6 +56,7 @@ def started_cluster():
"configs/named_collections.xml",
"configs/schema_cache.xml",
"configs/blob_log.xml",
"configs/filesystem_caches.xml",
],
user_configs=[
"configs/access.xml",
@ -2394,3 +2395,61 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
)
assert int(result) == 44
def test_filesystem_cache(started_cluster):
id = uuid.uuid4()
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
table_name = f"test_filesystem_cache-{uuid.uuid4()}"
instance.query(
f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv', auto, 'x UInt64') select number from numbers(100) SETTINGS s3_truncate_on_insert=1"
)
query_id = f"{table_name}-{uuid.uuid4()}"
instance.query(
f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv') SETTINGS filesystem_cache_name = 'cache1', enable_filesystem_cache=1",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
count = int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert count == 290
assert 0 < int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
instance.query("SYSTEM DROP SCHEMA CACHE")
query_id = f"{table_name}-{uuid.uuid4()}"
instance.query(
f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv') SETTINGS filesystem_cache_name = 'cache1', enable_filesystem_cache=1",
query_id=query_id,
)
instance.query("SYSTEM FLUSH LOGS")
assert count * 2 == int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 == int(
instance.query(
f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)
assert 0 == int(
instance.query(
f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
)
)

View File

@ -9,7 +9,7 @@ system flush logs;
drop table if exists logs;
create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time;
SET max_rows_to_read = 0;
SET max_rows_to_read = 0; -- system.text_log can be really big
-- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
-- 0.001 threshold should be always enough, the value was about 0.00025

View File

@ -144,8 +144,7 @@ echo 9
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_date >= yesterday() and message like '%find_me%';
select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%';" --ignore-error
select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%' SETTINGS max_rows_to_read = 0" --ignore-error
echo 'finish'
rm -f "$tmp_file" >/dev/null 2>&1
rm -f "$tmp_file2" >/dev/null 2>&1

View File

@ -6,12 +6,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
${CLICKHOUSE_CLIENT} --query="SELECT 6103"
for (( i=1; i <= 50; i++ ))
for (( i=1; i <= 50; i++ ))
do
${CLICKHOUSE_CLIENT} --query="SYSTEM FLUSH LOGS"
sleep 0.1;
if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday()") == 1 ]]; then echo 1; exit; fi;
if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday() SETTINGS max_rows_to_read = 0") == 1 ]]; then echo 1; exit; fi;
done;

View File

@ -1,5 +1,7 @@
-- Tags: zookeeper
SET max_rows_to_read = 0; -- system.text_log can be really big
create table rmt1 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by toYYYYMMDD(d);
create table rmt2 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by toYYYYMMDD(d);

View File

@ -2,5 +2,6 @@ SeLeCt 'ab
cd' /* hello */ -- world
, 1;
SET max_rows_to_read = 0; -- system.text_log can be really big
SYSTEM FLUSH LOGS;
SELECT extract(message, 'SeL.+?;') FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%SeLeCt \'ab\n%' ORDER BY event_time DESC LIMIT 1 FORMAT TSVRaw;

View File

@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_siz
# Now wait for cleanup thread
for _ in {1..60}; do
$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
[[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;
[[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%' SETTINGS max_rows_to_read = 0") -gt $((SCALE - 10)) ]] && break;
sleep 1
done

View File

@ -72,6 +72,6 @@ ${CLICKHOUSE_CLIENT} --query_id "$query_id" --query "select i from simple where
# We have to search the server's error log because the following warning message
# is generated during pipeline destruction and thus is not sent to the client.
${CLICKHOUSE_CLIENT} --query "system flush logs"
if [[ $(${CLICKHOUSE_CLIENT} --query "select count() > 0 from system.text_log where query_id = '$query_id' and level = 'Warning' and message like '%We have query_id removed but it\'s not recorded. This is a bug%' format TSVRaw") == 1 ]]; then echo "We have query_id removed but it's not recorded. This is a bug." >&2; exit 1; fi
if [[ $(${CLICKHOUSE_CLIENT} --query "select count() > 0 from system.text_log where query_id = '$query_id' and level = 'Warning' and message like '%We have query_id removed but it\'s not recorded. This is a bug%' format TSVRaw SETTINGS max_rows_to_read = 0") == 1 ]]; then echo "We have query_id removed but it's not recorded. This is a bug." >&2; exit 1; fi
${CLICKHOUSE_CLIENT} --query "drop table simple"

View File

@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -m -q "
-- OPTIMIZE TABLE x FINAL will be done in background
-- attach to it's log, via table UUID in query_id (see merger/mutator code).
create materialized view this_text_log engine=Memory() as
select * from system.text_log where query_id like '%${ttl_02262_uuid}%';
select * from system.text_log where query_id like '%${ttl_02262_uuid}%' SETTINGS max_rows_to_read = 0;
optimize table ttl_02262 final;
system flush logs;

View File

@ -2,3 +2,5 @@
{"query":"show databases format Null\n "}
{"query":"insert into opentelemetry_test values","read_rows":"3","written_rows":"3"}
{"query":"select * from opentelemetry_test format Null\n ","read_rows":"3","written_rows":""}
{"query":"SELECT * FROM opentelemetry_test FORMAT Null\n ","read_rows":"3","written_rows":""}
{"min_compress_block_size":"present","max_block_size":"present","max_execution_time":"present"}

View File

@ -44,6 +44,37 @@ ${CLICKHOUSE_CLIENT} -q "
;"
}
function check_query_settings()
{
result=$(${CLICKHOUSE_CLIENT} -q "
SYSTEM FLUSH LOGS;
SELECT attribute['clickhouse.setting.min_compress_block_size'],
attribute['clickhouse.setting.max_block_size'],
attribute['clickhouse.setting.max_execution_time']
FROM system.opentelemetry_span_log
WHERE finish_date >= yesterday()
AND operation_name = 'query'
AND attribute['clickhouse.query_id'] = '${1}'
FORMAT JSONEachRow;
")
local min_present="not found"
local max_present="not found"
local execution_time_present="not found"
if [[ $result == *"min_compress_block_size"* ]]; then
min_present="present"
fi
if [[ $result == *"max_block_size"* ]]; then
max_present="present"
fi
if [[ $result == *"max_execution_time"* ]]; then
execution_time_present="present"
fi
echo "{\"min_compress_block_size\":\"$min_present\",\"max_block_size\":\"$max_present\",\"max_execution_time\":\"$execution_time_present\"}"
}
#
# Set up
#
@ -73,6 +104,11 @@ query_id=$(${CLICKHOUSE_CLIENT} -q "select generateUUIDv4()");
execute_query $query_id 'select * from opentelemetry_test format Null'
check_query_span $query_id
# Test 5: A normal select query with a setting
query_id=$(${CLICKHOUSE_CLIENT} -q "SELECT generateUUIDv4() SETTINGS max_execution_time=3600")
execute_query "$query_id" 'SELECT * FROM opentelemetry_test FORMAT Null'
check_query_span "$query_id"
check_query_settings "$query_id" "max_execution_time"
#
# Tear down

View File

@ -64,5 +64,6 @@ drop table rmt;
drop table rmt2;
system flush logs;
SET max_rows_to_read = 0; -- system.text_log can be really big
select count() > 0 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts from filesystem (concurrently): Parts:%';
select count() > 1, countDistinct(thread_id) > 1 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts in blocks range%';

View File

@ -99,4 +99,4 @@ $CLICKHOUSE_CLIENT -q 'system flush logs'
$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes',
'Query was cancelled or a client has unexpectedly dropped the connection') or
message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%') SETTINGS max_rows_to_read = 0"

View File

@ -15,6 +15,7 @@ alter table t materialize projection p_norm settings mutations_sync = 1;
SYSTEM FLUSH LOGS;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT * FROM system.text_log WHERE event_time >= now() - 30 and level == 'Error' and message like '%BAD_DATA_PART_NAME%'and message like '%p_norm%';
DROP TABLE IF EXISTS t;

View File

@ -12,5 +12,5 @@ do
query_id=$(echo "select queryID() from (select sum(s), k from remote('127.0.0.{1,2}', view(select sum(number) s, bitAnd(number, 3) k from numbers_mt(1000000) group by k)) group by k) limit 1 settings group_by_two_level_threshold=1, max_threads=3, prefer_localhost_replica=1" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- 2>&1)
${CLICKHOUSE_CLIENT} --query="system flush logs"
${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= yesterday() and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%'" | grep -P '^6$' && break;
${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= yesterday() and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%' SETTINGS max_rows_to_read = 0" | grep -P '^6$' && break;
done

View File

@ -48,6 +48,7 @@ $CLICKHOUSE_CLIENT --query "
SELECT 'id_' || splitByChar('_', query_id)[1] AS id FROM system.text_log
WHERE query_id LIKE '%$query_id_suffix' AND message LIKE '%$message%'
ORDER BY id
SETTINGS max_rows_to_read = 0
"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback"

View File

@ -30,6 +30,7 @@ SELECT count() FROM 02581_trips SETTINGS select_sequential_consistency = 1;
DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(10000000)) SETTINGS lightweight_deletes_sync = 2;
SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part SETTINGS select_sequential_consistency=1;
SET max_rows_to_read = 0; -- system.text_log can be really big
SYSTEM FLUSH LOGS;
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )

View File

@ -58,6 +58,7 @@ WHERE
SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
SET max_rows_to_read = 0; -- system.text_log can be really big
SYSTEM FLUSH LOGS;
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )

View File

@ -30,5 +30,5 @@ ORDER BY column;
DROP TABLE t_sparse_columns_clear SYNC;
SYSTEM FLUSH LOGS;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT count(), groupArray(message) FROM system.text_log WHERE logger_name LIKE '%' || currentDatabase() || '.t_sparse_columns_clear' || '%' AND level = 'Error';

View File

@ -1,5 +1,6 @@
-- Tags: no-parallel
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
SYSTEM FLUSH LOGS;

View File

@ -1,2 +1,3 @@
SYSTEM FLUSH LOGS;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Starting ClickHouse%';

View File

@ -8,6 +8,7 @@ SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_lo
SYSTEM FLUSH LOGS;
SET enable_parallel_replicas=0;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT count() > 0 FROM system.text_log
WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_190aed82-2423-413b-ad4c-24dcca50f65b')
AND message LIKE '%Parallel reading from replicas is disabled for cluster%';

View File

@ -8,6 +8,7 @@ SELECT count() FROM remote('127.0.0.{1..6}', currentDatabase(), tt) settings log
SYSTEM FLUSH LOGS;
SET enable_parallel_replicas=0;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT count() > 0 FROM system.text_log
WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_89f3c39b-1919-48cb-b66e-ef9904e73146')
AND message LIKE '%Parallel reading from replicas is disabled for cluster%';

View File

@ -34,6 +34,7 @@ SELECT count() FROM checksums_r3;
SYSTEM FLUSH LOGS;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT * FROM system.text_log WHERE event_time >= now() - INTERVAL 120 SECOND and level == 'Error' and message like '%CHECKSUM_DOESNT_MATCH%' and logger_name like ('%' || currentDatabase() || '%checksums_r%');
DROP TABLE IF EXISTS checksums_r3;

View File

@ -21,6 +21,7 @@ SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_defau
-- check logs
SYSTEM FLUSH LOGS;
SET max_rows_to_read = 0; -- system.text_log can be really big
SELECT count() > 0 FROM system.text_log
WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b')
AND message LIKE '%Total rows to read: 3000%' SETTINGS enable_parallel_replicas=0;

Some files were not shown because too many files have changed in this diff Show More