Merge remote-tracking branch 'origin/master' into pr-enable-local-plan

This commit is contained in:
Igor Nikonov 2024-10-20 18:37:35 +00:00
commit fee4df8a28
533 changed files with 8050 additions and 15997 deletions

View File

@ -545,7 +545,7 @@ endif()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
AND OMIT_HEAVY_DEBUG_SYMBOLS AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
else ()
set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)

View File

@ -11,6 +11,9 @@ if (GLIBC_COMPATIBILITY)
if (ARCH_AARCH64)
list (APPEND glibc_compatibility_sources musl/aarch64/syscall.s musl/aarch64/longjmp.s)
set (musl_arch_include_dir musl/aarch64)
# Disable getauxval in aarch64. ARM glibc minimum requirement for the project is 2.18 and getauxval is present
# in 2.16. Having a custom one introduces issues with sanitizers
list (REMOVE_ITEM glibc_compatibility_sources musl/getauxval.c)
elseif (ARCH_AMD64)
list (APPEND glibc_compatibility_sources musl/x86_64/syscall.s musl/x86_64/longjmp.s)
set (musl_arch_include_dir musl/x86_64)
@ -18,7 +21,7 @@ if (GLIBC_COMPATIBILITY)
message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.")
endif ()
if (SANITIZE STREQUAL thread)
if (SANITIZE STREQUAL thread AND ARCH_AMD64)
# Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval
# See longer comment in __auxv_init_procfs
# In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan

View File

@ -1,4 +1,21 @@
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
# Enable libcxx debug mode: https://releases.llvm.org/15.0.0/projects/libcxx/docs/DesignDocs/DebugMode.html
# The docs say the debug mode violates complexity guarantees, so do this only for Debug builds.
# set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_DEBUG_MODE=1")
# ^^ Crashes the database upon startup, needs investigation.
# Besides that, the implementation looks like a poor man's MSAN specific to libcxx. Since CI tests MSAN
# anyways, we can keep the debug mode disabled.
# Libcxx also provides extra assertions:
# --> https://releases.llvm.org/15.0.0/projects/libcxx/docs/UsingLibcxx.html#assertions-mode
# These look orthogonal to the debug mode but the debug mode enables them implicitly:
# --> https://github.com/llvm/llvm-project/blob/release/15.x/libcxx/include/__assert#L29
# They are cheap and straightforward, so enable them in debug builds:
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_ASSERTIONS=1")
# TODO Once we upgrade to LLVM 18+, reconsider all of the above as they introduced "hardening modes":
# https://libcxx.llvm.org/Hardening.html
endif ()
add_subdirectory(contrib/libcxxabi-cmake)
add_subdirectory(contrib/libcxx-cmake)

View File

@ -1,6 +1,9 @@
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
set(CMAKE_CXX_STANDARD 17)
# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
#

View File

@ -5,6 +5,9 @@ if(NOT ENABLE_PROTOBUF)
return()
endif()
# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
set(CMAKE_CXX_STANDARD 17)
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found

View File

@ -6,6 +6,8 @@ if(NOT ENABLE_GRPC)
return()
endif()
set(CMAKE_CXX_STANDARD 17)
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")

View File

@ -22,7 +22,7 @@
# limitations under the License.
# We want to use C++23, but GRPC is not ready
set (CMAKE_CXX_STANDARD 20)
set (CMAKE_CXX_STANDARD 17)
set(_gRPC_ZLIB_INCLUDE_DIR "")
set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf
Subproject commit de6f1e0750aa3670a603cbfeddf5df3de1097687

View File

@ -0,0 +1,5 @@
#!/bin/bash
set -e
# workaround for https://github.com/bitnami/containers/issues/73310
touch /tmp/.openldap-initialized

View File

@ -1,4 +1,4 @@
aiohttp==3.9.5
aiohttp==3.10.2
aiosignal==1.3.1
astroid==3.1.0
async-timeout==4.0.3
@ -6,12 +6,12 @@ attrs==23.2.0
black==24.4.2
boto3==1.34.131
botocore==1.34.131
certifi==2024.6.2
certifi==2024.07.04
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
codespell==2.2.1
cryptography==42.0.8
cryptography==43.0.1
Deprecated==1.2.14
dill==0.3.8
flake8==4.0.1
@ -19,7 +19,6 @@ frozenlist==1.4.1
idna==3.7
isort==5.13.2
jmespath==1.0.1
jwt==1.3.1
mccabe==0.6.1
multidict==6.0.5
mypy==1.8.0
@ -27,13 +26,12 @@ mypy-extensions==1.0.0
packaging==24.1
pathspec==0.9.0
pip==24.1.1
pipdeptree==2.23.0
platformdirs==4.2.2
pycodestyle==2.8.0
pycparser==2.22
pyflakes==2.4.0
PyGithub==2.3.0
PyJWT==2.8.0
PyJWT==2.9.0
pylint==3.1.0
PyNaCl==1.5.0
python-dateutil==2.9.0.post0
@ -42,7 +40,7 @@ PyYAML==6.0.1
rapidfuzz==3.9.3
requests==2.32.3
s3transfer==0.10.1
setuptools==59.6.0
setuptools==70.0.0
six==1.16.0
thefuzz==0.22.1
tomli==2.0.1
@ -52,7 +50,7 @@ types-requests==2.32.0.20240622
typing_extensions==4.12.2
unidiff==0.7.5
urllib3==2.2.2
wheel==0.37.1
wheel==0.38.1
wrapt==1.16.0
yamllint==1.26.3
yarl==1.9.4

View File

@ -196,7 +196,6 @@ When writing docs, you can use prepared templates. Copy the code of a template a
Templates:
- [Function](_description_templates/template-function.md)
- [Setting](_description_templates/template-setting.md)
- [Server Setting](_description_templates/template-server-setting.md)
- [Database or Table engine](_description_templates/template-engine.md)
- [System table](_description_templates/template-system-table.md)

View File

@ -1,27 +0,0 @@
## setting_name {#setting_name}
Description.
For the switch setting, use the typical phrase: “Enables or disables something ...”.
Possible values:
*For switcher setting:*
- 0 — Disabled.
- 1 — Enabled.
*For another setting (typical phrases):*
- Positive integer.
- 0 — Disabled or unlimited or something else.
Default value: `value`.
**Additional Info** (Optional)
The name of an additional section can be any, for example, **Usage**.
**See Also** (Optional)
- [link](#)

View File

@ -1,11 +0,0 @@
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you've set up a password.

View File

@ -1,6 +0,0 @@
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
sudo yum install -y clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.

View File

@ -1,32 +0,0 @@
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
export LATEST_VERSION
case $(uname -m) in
x86_64) ARCH=amd64 ;;
aarch64) ARCH=arm64 ;;
*) echo "Unknown architecture $(uname -m)"; exit 1 ;;
esac
for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client
do
curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \
|| curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
done
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
sudo /etc/init.d/clickhouse-server start
tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
|| tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"

View File

@ -14,7 +14,12 @@ then
HAS_SSE42=$(grep sse4_2 /proc/cpuinfo)
if [ "${HAS_SSE42}" ]
then
if ldd --version 2>&1 | grep -q musl
then
DIR="amd64musl"
else
DIR="amd64"
fi
else
DIR="amd64compat"
fi

View File

@ -7,315 +7,5 @@ sidebar_position: 70
# [experimental] MaterializedMySQL
:::note
This database engine is experimental. To use it, set `allow_experimental_database_materialized_mysql` to 1 in your configuration files or by using the `SET` command:
```sql
SET allow_experimental_database_materialized_mysql=1
```
This database engine is obsolete and cannot be used.
:::
Creates a ClickHouse database with all the tables existing in MySQL, and all the data in those tables. The ClickHouse server works as MySQL replica. It reads `binlog` and performs DDL and DML queries.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)]
```
**Engine Parameters**
- `host:port` — MySQL server endpoint.
- `database` — MySQL database name.
- `user` — MySQL user.
- `password` — User password.
## Engine Settings
### max_rows_in_buffer
`max_rows_in_buffer` — Maximum number of rows that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`.
### max_bytes_in_buffer
`max_bytes_in_buffer` — Maximum number of bytes that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`.
### max_flush_data_time
`max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`.
### max_wait_time_when_mysql_unavailable
`max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`.
### allows_query_when_mysql_lost
`allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`).
### allow_startup_database_without_connection_to_mysql
`allow_startup_database_without_connection_to_mysql` — Allow to create and attach database without available connection to MySQL. Default: `0` (`false`).
### materialized_mysql_tables_list
`materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.
```sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
## Settings on MySQL-server Side
For the correct work of `MaterializedMySQL`, there are few mandatory `MySQL`-side configuration settings that must be set:
### default_authentication_plugin
`default_authentication_plugin = mysql_native_password` since `MaterializedMySQL` can only authorize with this method.
### gtid_mode
`gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializedMySQL` replication.
:::note
While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`.
:::
## Virtual Columns {#virtual-columns}
When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
### \_version
`_version` — Transaction counter. Type [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
### \_sign
`_sign` — Deletion mark. Type [Int8](/docs/en/sql-reference/data-types/int-uint.md). Possible values:
- `1` — Row is not deleted,
- `-1` — Row is deleted.
## Data Types Support {#data_types-support}
| MySQL | ClickHouse |
|-------------------------|--------------------------------------------------------------|
| TINY | [Int8](/docs/en/sql-reference/data-types/int-uint.md) |
| SHORT | [Int16](/docs/en/sql-reference/data-types/int-uint.md) |
| INT24 | [Int32](/docs/en/sql-reference/data-types/int-uint.md) |
| LONG | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) |
| LONGLONG | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](/docs/en/sql-reference/data-types/float.md) |
| DOUBLE | [Float64](/docs/en/sql-reference/data-types/float.md) |
| DECIMAL, NEWDECIMAL | [Decimal](/docs/en/sql-reference/data-types/decimal.md) |
| DATE, NEWDATE | [Date](/docs/en/sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](/docs/en/sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
| YEAR | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) |
| TIME | [Int64](/docs/en/sql-reference/data-types/int-uint.md) |
| ENUM | [Enum](/docs/en/sql-reference/data-types/enum.md) |
| STRING | [String](/docs/en/sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](/docs/en/sql-reference/data-types/string.md) |
| BLOB | [String](/docs/en/sql-reference/data-types/string.md) |
| GEOMETRY | [String](/docs/en/sql-reference/data-types/string.md) |
| BINARY | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| BIT | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) |
| SET | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) |
[Nullable](/docs/en/sql-reference/data-types/nullable.md) is supported.
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
## Specifics and Recommendations {#specifics-and-recommendations}
### Compatibility Restrictions {#compatibility-restrictions}
Apart of the data types limitations there are few restrictions comparing to `MySQL` databases, that should be resolved before replication will be possible:
- Each table in `MySQL` should contain `PRIMARY KEY`.
- Replication for tables, those are containing rows with `ENUM` field values out of range (specified in `ENUM` signature) will not work.
### DDL Queries {#ddl-queries}
MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ALTER](/docs/en/sql-reference/statements/alter/index.md), [CREATE](/docs/en/sql-reference/statements/create/index.md), [DROP](/docs/en/sql-reference/statements/drop.md), [RENAME](/docs/en/sql-reference/statements/rename.md)). If ClickHouse cannot parse some DDL query, the query is ignored.
### Data Replication {#data-replication}
`MaterializedMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
- MySQL `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1` if the primary key has been changed, or
`INSERT` with `_sign=1` if not.
### Selecting from MaterializedMySQL Tables {#select}
`SELECT` query from `MaterializedMySQL` tables has some specifics:
- If `_version` is not specified in the `SELECT` query, the
[FINAL](/docs/en/sql-reference/statements/select/from.md/#select-from-final) modifier is used, so only rows with
`MAX(_version)` are returned for each primary key value.
- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not
included into the result set.
- The result includes columns comments in case they exist in MySQL database tables.
### Index Conversion {#index-conversion}
MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in ClickHouse tables.
ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use
[materialized views](/docs/en/sql-reference/statements/create/view.md/#materialized).
**Notes**
- Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializedMySQL` engine, as they are not visible in the
MySQL binlog.
- Replication can be easily broken.
- Manual operations on database and tables are forbidden.
- `MaterializedMySQL` is affected by the [optimize_on_insert](/docs/en/operations/settings/settings.md/#optimize-on-insert)
setting. Data is merged in the corresponding table in the `MaterializedMySQL` database when a table in the MySQL
server changes.
### Table Overrides {#table-overrides}
Table overrides can be used to customize the ClickHouse DDL queries, allowing you to make schema optimizations for your
application. This is especially useful for controlling partitioning, which is important for the overall performance of
MaterializedMySQL.
These are the schema conversion manipulations you can do with table overrides for MaterializedMySQL:
* Modify column type. Must be compatible with the original type, or replication will fail. For example,
you can modify a UInt32 column to UInt64, but you can not modify a String column to Array(String).
* Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
* Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs).
* Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias).
* Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default)
* Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are
disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here.
`INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)
may be more useful in this case.
* Modify [PARTITION BY](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/)
* Modify [ORDER BY](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses)
* Modify [PRIMARY KEY](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses)
* Add [SAMPLE BY](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses)
* Add [table TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses)
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
[SETTINGS ...]
[TABLE OVERRIDE table_name (
[COLUMNS (
[col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...]
[INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...]
[PROJECTION projection_name (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]), ...]
)]
[ORDER BY expr]
[PRIMARY KEY expr]
[PARTITION BY expr]
[SAMPLE BY expr]
[TTL expr]
), ...]
```
Example:
```sql
CREATE DATABASE db_name ENGINE = MaterializedMySQL(...)
TABLE OVERRIDE table1 (
COLUMNS (
userid UUID,
category LowCardinality(String),
timestamp DateTime CODEC(Delta, Default)
)
PARTITION BY toYear(timestamp)
),
TABLE OVERRIDE table2 (
COLUMNS (
client_ip String TTL created + INTERVAL 72 HOUR
)
SAMPLE BY ip_hash
)
```
The `COLUMNS` list is sparse; existing columns are modified as specified, extra ALIAS columns are added. It is not
possible to add ordinary or MATERIALIZED columns. Modified columns with a different type must be assignable from the
original type. There is currently no validation of this or similar issues when the `CREATE DATABASE` query executes, so
extra care needs to be taken.
You may specify overrides for tables that do not exist yet.
:::important
It is easy to break replication with table overrides if not used with care. For example:
* If an ALIAS column is added with a table override, and a column with the same name is later added to the source
MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops.
* It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in
`ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop.
:::
## Examples of Use {#examples-of-use}
Queries in MySQL:
``` sql
mysql> CREATE DATABASE db;
mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
mysql> DELETE FROM db.test WHERE a=1;
mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```
Database in ClickHouse, exchanging data with the MySQL server:
The database and the table created:
``` sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
``` text
┌─name─┐
│ test │
└──────┘
```
After inserting data:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬──b─┐
│ 1 │ 11 │
│ 2 │ 22 │
└───┴────┘
```
After deleting data, adding the column and updating:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```

View File

@ -9,7 +9,7 @@ sidebar_label: MongoDB
MongoDB engine is read-only table engine which allows to read data from remote [MongoDB](https://www.mongodb.com/) collection.
Only MongoDB v3.6+ servers are supported.
[Seed list(`mongodb**+srv**`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported.
[Seed list(`mongodb+srv`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported.
:::note
If you're facing troubles, please report the issue, and try to use [the legacy implementation](../../../operations/server-configuration-parameters/settings.md#use_legacy_mongodb_integration).

View File

@ -31,6 +31,10 @@ The table must be enabled in the server configuration, see the `opentelemetry_sp
The tags or attributes are saved as two parallel arrays, containing the keys and values. Use [ARRAY JOIN](../sql-reference/statements/select/array-join.md) to work with them.
## Log-query-settings
ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
## Integration with monitoring systems
At the moment, there is no ready tool that can export the tracing data from ClickHouse to a monitoring system.

View File

@ -2,7 +2,6 @@
title: "Settings Overview"
sidebar_position: 1
slug: /en/operations/settings/
pagination_next: en/operations/settings/settings
---
# Settings Overview

View File

@ -1,176 +0,0 @@
# The MySQL Binlog Client
The MySQL Binlog Client provides a mechanism in ClickHouse to share the binlog from a MySQL instance among multiple [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) databases. This avoids consuming unnecessary bandwidth and CPU when replicating more than one schema/database.
The implementation is resilient against crashes and disk issues. The executed GTID sets of the binlog itself and the consuming databases have persisted only after the data they describe has been safely persisted as well. The implementation also tolerates re-doing aborted operations (at-least-once delivery).
# Settings
## use_binlog_client
Forces to reuse existing MySQL binlog connection or creates new one if does not exist. The connection is defined by `user:pass@host:port`.
Default value: 0
**Example**
```sql
-- create MaterializedMySQL databases that read the events from the binlog client
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
CREATE DATABASE db3 ENGINE = MaterializedMySQL('host:port', 'db3', 'user2', 'password2') SETTINGS use_binlog_client=1
```
Databases `db1` and `db2` will use the same binlog connection, since they use the same `user:pass@host:port`. Database `db3` will use separate binlog connection.
## max_bytes_in_binlog_queue
Defines the limit of bytes in the events binlog queue. If bytes in the queue increases this limit, it will stop reading new events from MySQL until the space for new events will be freed. This introduces the memory limits. Very high value could consume all available memory. Very low value could make the databases to wait for new events.
Default value: 67108864
**Example**
```sql
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
```
If database `db1` is unable to consume binlog events fast enough and the size of the events queue exceeds `33554432` bytes, reading of new events from MySQL is postponed until `db1`
consumes the events and releases some space.
NOTE: This will impact to `db2`, and it will be waiting for new events too, since they share the same connection.
## max_milliseconds_to_wait_in_binlog_queue
Defines the max milliseconds to wait when `max_bytes_in_binlog_queue` exceeded. After that it will detach the database from current binlog connection and will retry establish new one to prevent other databases to wait for this database.
Default value: 10000
**Example**
```sql
CREATE DATABASE db1 ENGINE = MaterializedMySQL('host:port', 'db1', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=33554432, max_milliseconds_to_wait_in_binlog_queue=1000
CREATE DATABASE db2 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1
```
If the event queue of database `db1` is full, the binlog connection will be waiting in `1000`ms and if the database is not able to consume the events, it will be detached from the connection to create another one.
NOTE: If the database `db1` has been detached from the shared connection and created new one, after the binlog connections for `db1` and `db2` have the same positions they will be merged to one. And `db1` and `db2` will use the same connection again.
## max_bytes_in_binlog_dispatcher_buffer
Defines the max bytes in the binlog dispatcher's buffer before it is flushed to attached binlog. The events from MySQL binlog connection are buffered before sending to attached databases. It increases the events throughput from the binlog to databases.
Default value: 1048576
## max_flush_milliseconds_in_binlog_dispatcher
Defines the max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlog. If there are no events received from MySQL binlog connection for a while, after some time buffered events should be sent to the attached databases.
Default value: 1000
# Design
## The Binlog Events Dispatcher
Currently each MaterializedMySQL database opens its own connection to MySQL to subscribe to binlog events. There is a need to have only one connection and _dispatch_ the binlog events to all databases that replicate from the same MySQL instance.
## Each MaterializedMySQL Database Has Its Own Event Queue
To prevent slowing down other instances there should be an _event queue_ per MaterializedMySQL database to handle the events independently of the speed of other instances. The dispatcher reads an event from the binlog, and sends it to every MaterializedMySQL database that needs it. Each database handles its events in separate threads.
## Catching up
If several databases have the same binlog position, they can use the same dispatcher. If a newly created database (or one that has been detached for some time) requests events that have been already processed, we need to create another communication _channel_ to the binlog. We do this by creating another temporary dispatcher for such databases. When the new dispatcher _catches up with_ the old one, the new/temporary dispatcher is not needed anymore and all databases getting events from this dispatcher can be moved to the old one.
## Memory Limit
There is a _memory limit_ to control event queue memory consumption per MySQL Client. If a database is not able to handle events fast enough, and the event queue is getting full, we have the following options:
1. The dispatcher is blocked until the slowest database frees up space for new events. All other databases are waiting for the slowest one. (Preferred)
2. The dispatcher is _never_ blocked, but suspends incremental sync for the slow database and continues dispatching events to remained databases.
## Performance
A lot of CPU can be saved by not processing every event in every database. The binlog contains events for all databases, it is wasteful to distribute row events to a database that it will not process it, especially if there are a lot of databases. This requires some sort of per-database binlog filtering and buffering.
Currently all events are sent to all MaterializedMySQL databases but parsing the event which consumes CPU is up to the database.
# Detailed Design
1. If a client (e.g. database) wants to read a stream of the events from MySQL binlog, it creates a connection to remote binlog by host/user/password and _executed GTID set_ params.
2. If another client wants to read the events from the binlog but for different _executed GTID set_, it is **not** possible to reuse existing connection to MySQL, then need to create another connection to the same remote binlog. (_This is how it is implemented today_).
3. When these 2 connections get the same binlog positions, they read the same events. It is logical to drop duplicate connection and move all its users out. And now one connection dispatches binlog events to several clients. Obviously only connections to the same binlog should be merged.
## Classes
1. One connection can send (or dispatch) events to several clients and might be called `BinlogEventsDispatcher`.
2. Several dispatchers grouped by _user:password@host:port_ in `BinlogClient`. Since they point to the same binlog.
3. The clients should communicate only with public API from `BinlogClient`. The result of using `BinlogClient` is an object that implements `IBinlog` to read events from. This implementation of `IBinlog` must be compatible with old implementation `MySQLFlavor` -> when replacing old implementation by new one, the behavior must not be changed.
## SQL
```sql
-- create MaterializedMySQL databases that read the events from the binlog client
CREATE DATABASE db1_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1, max_bytes_in_binlog_queue=1024;
CREATE DATABASE db2_client1 ENGINE = MaterializedMySQL('host:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1;
CREATE DATABASE db3_client1 ENGINE = MaterializedMySQL('host:port', 'db2', 'user', 'password') SETTINGS use_binlog_client=1;
CREATE DATABASE db4_client2 ENGINE = MaterializedMySQL('host2:port', 'db', 'user', 'password') SETTINGS use_binlog_client=1;
CREATE DATABASE db5_client3 ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=1;
CREATE DATABASE db6_old ENGINE = MaterializedMySQL('host:port', 'db', 'user1', 'password') SETTINGS use_binlog_client=0;
```
Databases `db1_client1`, `db2_client1` and `db3_client1` share one instance of `BinlogClient` since they have the same params. `BinlogClient` will create 3 connections to MySQL server thus 3 instances of `BinlogEventsDispatcher`, but if these connections would have the same binlog position, they should be merged to one connection. Means all clients will be moved to one dispatcher and others will be closed. Databases `db4_client2` and `db5_client3` would use 2 different independent `BinlogClient` instances. Database `db6_old` will use old implementation. NOTE: By default `use_binlog_client` is disabled. Setting `max_bytes_in_binlog_queue` defines the max allowed bytes in the binlog queue. By default, it is `1073741824` bytes. If number of bytes exceeds this limit, the dispatching will be stopped until the space will be freed for new events.
## Binlog Table Structure
To see the status of the all `BinlogClient` instances there is `system.mysql_binlogs` system table. It shows the list of all created and _alive_ `IBinlog` instances with information about its `BinlogEventsDispatcher` and `BinlogClient`.
Example:
```
SELECT * FROM system.mysql_binlogs FORMAT Vertical
Row 1:
──────
binlog_client_name: root@127.0.0.1:3306
name: test_Clickhouse1
mysql_binlog_name: binlog.001154
mysql_binlog_pos: 7142294
mysql_binlog_timestamp: 1660082447
mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304
dispatcher_name: Applier
dispatcher_mysql_binlog_name: binlog.001154
dispatcher_mysql_binlog_pos: 7142294
dispatcher_mysql_binlog_timestamp: 1660082447
dispatcher_mysql_binlog_executed_gtid_set: a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30523304
size: 0
bytes: 0
max_bytes: 0
```
### Tests
Unit tests:
```
$ ./unit_tests_dbms --gtest_filter=MySQLBinlog.*
```
Integration tests:
```
$ pytest -s -vv test_materialized_mysql_database/test.py::test_binlog_client
```
Dumps events from the file
```
$ ./utils/check-mysql-binlog/check-mysql-binlog --binlog binlog.001392
```
Dumps events from the server
```
$ ./utils/check-mysql-binlog/check-mysql-binlog --host 127.0.0.1 --port 3306 --user root --password pass --gtid a9d88f83-c14e-11ec-bb36-244bfedf7766:1-30462856
```

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -737,6 +737,14 @@ Number of sessions (connections) to ZooKeeper. Should be no more than one, becau
Number of watches (event subscriptions) in ZooKeeper.
### ConcurrencyControlAcquired
Total number of acquired CPU slots.
### ConcurrencyControlSoftLimit
Value of soft limit on number of CPU slots.
**See Also**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.

View File

@ -177,6 +177,26 @@ When you are ready to insert your files into ClickHouse, startup a ClickHouse se
:::
## Format Conversions
You can use `clickhouse-local` for converting data between different formats. Example:
``` bash
$ clickhouse-local --input-format JSONLines --output-format CSV --query "SELECT * FROM table" < data.json > data.csv
```
Formats are auto-detected from file extensions:
``` bash
$ clickhouse-local --query "SELECT * FROM table" < data.json > data.csv
```
As a shortcut, you can write it using the `--copy` argument:
``` bash
$ clickhouse-local --copy < data.json > data.csv
```
## Usage {#usage}
By default `clickhouse-local` has access to data of a ClickHouse server on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.

View File

@ -261,9 +261,10 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`.
- `mode` — It is an optional argument. One or more modes can be set.
- `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing.
- `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. Note: it may work unexpectedly if several conditions hold for the same event.
- `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2.
- `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps.
- `'strict_once'` — Count each event only once in the chain even if it meets the condition several times
**Returned value**

View File

@ -1,190 +0,0 @@
---
slug: /en/sql-reference/ansi
sidebar_position: 40
sidebar_label: ANSI Compatibility
title: "ANSI SQL Compatibility of ClickHouse SQL Dialect"
---
:::note
This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8).
:::
## Differences in Behaviour
The following table lists cases when query feature works in ClickHouse, but behaves not as specified in ANSI SQL.
| Feature ID | Feature Name | Difference |
|------------|-----------------------------|-----------------------------------------------------------------------------------------------------------|
| E011 | Numeric data types | Numeric literal with period is interpreted as approximate (`Float64`) instead of exact (`Decimal`) |
| E051-05 | Select items can be renamed | Item renames have a wider visibility scope than just the SELECT result |
| E141-01 | NOT NULL constraints | `NOT NULL` is implied for table columns by default |
| E011-04 | Arithmetic operators | ClickHouse overflows instead of checked arithmetic and changes the result data type based on custom rules |
## Feature Status
| Feature ID | Feature Name | Status | Comment |
|------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **E011** | **Numeric data types** | <span class="text-warning">Partial</span> | |
| E011-01 | INTEGER and SMALLINT data types | <span class="text-success">Yes</span> | |
| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | <span class="text-success">Yes</span> | |
| E011-03 | DECIMAL and NUMERIC data types | <span class="text-success">Yes</span> | |
| E011-04 | Arithmetic operators | <span class="text-success">Yes</span> | |
| E011-05 | Numeric comparison | <span class="text-success">Yes</span> | |
| E011-06 | Implicit casting among the numeric data types | <span class="text-danger">No</span> | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| **E021** | **Character string types** | <span class="text-warning">Partial</span> | |
| E021-01 | CHARACTER data type | <span class="text-success">Yes</span> | |
| E021-02 | CHARACTER VARYING data type | <span class="text-success">Yes</span> | |
| E021-03 | Character literals | <span class="text-success">Yes</span> | |
| E021-04 | CHARACTER_LENGTH function | <span class="text-warning">Partial</span> | No `USING` clause |
| E021-05 | OCTET_LENGTH function | <span class="text-danger">No</span> | `LENGTH` behaves similarly |
| E021-06 | SUBSTRING | <span class="text-warning">Partial</span> | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
| E021-07 | Character concatenation | <span class="text-warning">Partial</span> | No `COLLATE` clause |
| E021-08 | UPPER and LOWER functions | <span class="text-success">Yes</span> | |
| E021-09 | TRIM function | <span class="text-success">Yes</span> | |
| E021-10 | Implicit casting among the fixed-length and variable-length character string types | <span class="text-warning">Partial</span> | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| E021-11 | POSITION function | <span class="text-warning">Partial</span> | No support for `IN` and `USING` clauses, no `POSITION_REGEX` variant |
| E021-12 | Character comparison | <span class="text-success">Yes</span> | |
| **E031** | **Identifiers** | <span class="text-warning">Partial</span>| |
| E031-01 | Delimited identifiers | <span class="text-warning">Partial</span> | Unicode literal support is limited |
| E031-02 | Lower case identifiers | <span class="text-success">Yes</span> | |
| E031-03 | Trailing underscore | <span class="text-success">Yes</span> | |
| **E051** | **Basic query specification** | <span class="text-warning">Partial</span>| |
| E051-01 | SELECT DISTINCT | <span class="text-success">Yes</span> | |
| E051-02 | GROUP BY clause | <span class="text-success">Yes</span> | |
| E051-04 | GROUP BY can contain columns not in `<select list>` | <span class="text-success">Yes</span> | |
| E051-05 | Select items can be renamed | <span class="text-success">Yes</span> | |
| E051-06 | HAVING clause | <span class="text-success">Yes</span> | |
| E051-07 | Qualified \* in select list | <span class="text-success">Yes</span> | |
| E051-08 | Correlation name in the FROM clause | <span class="text-success">Yes</span> | |
| E051-09 | Rename columns in the FROM clause | <span class="text-danger">No</span> | |
| **E061** | **Basic predicates and search conditions** | <span class="text-warning">Partial</span> | |
| E061-01 | Comparison predicate | <span class="text-success">Yes</span> | |
| E061-02 | BETWEEN predicate | <span class="text-warning">Partial</span> | No `SYMMETRIC` and `ASYMMETRIC` clause |
| E061-03 | IN predicate with list of values | <span class="text-success">Yes</span> | |
| E061-04 | LIKE predicate | <span class="text-success">Yes</span> | |
| E061-05 | LIKE predicate: ESCAPE clause | <span class="text-danger">No</span> | |
| E061-06 | NULL predicate | <span class="text-success">Yes</span> | |
| E061-07 | Quantified comparison predicate | <span class="text-danger">No</span> | |
| E061-08 | EXISTS predicate | <span class="text-danger">No</span> | |
| E061-09 | Subqueries in comparison predicate | <span class="text-success">Yes</span> | |
| E061-11 | Subqueries in IN predicate | <span class="text-success">Yes</span> | |
| E061-12 | Subqueries in quantified comparison predicate | <span class="text-danger">No</span> | |
| E061-13 | Correlated subqueries | <span class="text-danger">No</span> | |
| E061-14 | Search condition | <span class="text-success">Yes</span> | |
| **E071** | **Basic query expressions** | <span class="text-warning">Partial</span> | |
| E071-01 | UNION DISTINCT table operator | <span class="text-success">Yes</span> | |
| E071-02 | UNION ALL table operator | <span class="text-success">Yes</span> | |
| E071-03 | EXCEPT DISTINCT table operator | <span class="text-danger">No</span> | |
| E071-05 | Columns combined via table operators need not have exactly the same data type | <span class="text-success">Yes</span> | |
| E071-06 | Table operators in subqueries | <span class="text-success">Yes</span> | |
| **E081** | **Basic privileges** | <span class="text-success">Yes</span> |
| E081-01 | SELECT privilege at the table level | <span class="text-success">Yes</span> |
| E081-02 | DELETE privilege | |
| E081-03 | INSERT privilege at the table level | <span class="text-success">Yes</span> |
| E081-04 | UPDATE privilege at the table level | <span class="text-success">Yes</span> |
| E081-05 | UPDATE privilege at the column level | |
| E081-06 | REFERENCES privilege at the table level | | |
| E081-07 | REFERENCES privilege at the column level | | |
| E081-08 | WITH GRANT OPTION | <span class="text-success">Yes</span> | |
| E081-09 | USAGE privilege | | |
| E081-10 | EXECUTE privilege | | |
| **E091** | **Set functions** |<span class="text-success">Yes</span> |
| E091-01 | AVG | <span class="text-success">Yes</span> | |
| E091-02 | COUNT | <span class="text-success">Yes</span> | |
| E091-03 | MAX | <span class="text-success">Yes</span> | |
| E091-04 | MIN | <span class="text-success">Yes</span> | |
| E091-05 | SUM | <span class="text-success">Yes</span> | |
| E091-06 | ALL quantifier | <span class="text-success">Yes</span> | |
| E091-07 | DISTINCT quantifier | <span class="text-success">Yes</span> | Not all aggregate functions supported |
| **E101** | **Basic data manipulation** | <span class="text-warning">Partial</span> | |
| E101-01 | INSERT statement | <span class="text-success">Yes</span> | Note: primary key in ClickHouse does not imply the `UNIQUE` constraint |
| E101-03 | Searched UPDATE statement | <span class="text-warning">Partial</span> | Theres an `ALTER UPDATE` statement for batch data modification |
| E101-04 | Searched DELETE statement | <span class="text-warning">Partial</span> | Theres an `ALTER DELETE` statement for batch data removal |
| **E111** | **Single row SELECT statement** | <span class="text-danger">No</span> | |
| **E121** | **Basic cursor support** | <span class="text-danger">No</span> | |
| E121-01 | DECLARE CURSOR | <span class="text-danger">No</span> | |
| E121-02 | ORDER BY columns need not be in select list | <span class="text-success">Yes</span> | |
| E121-03 | Value expressions in ORDER BY clause | <span class="text-success">Yes</span> | |
| E121-04 | OPEN statement | <span class="text-danger">No</span> | |
| E121-06 | Positioned UPDATE statement | <span class="text-danger">No</span> | |
| E121-07 | Positioned DELETE statement | <span class="text-danger">No</span> | |
| E121-08 | CLOSE statement | <span class="text-danger">No</span> | |
| E121-10 | FETCH statement: implicit NEXT | <span class="text-danger">No</span> | |
| E121-17 | WITH HOLD cursors | <span class="text-danger">No</span> | |
| **E131** | **Null value support (nulls in lieu of values)** | <span class="text-success">Yes</span> | Some restrictions apply |
| **E141** | **Basic integrity constraints** | <span class="text-warning">Partial</span> | |
| E141-01 | NOT NULL constraints | <span class="text-success">Yes</span> | Note: `NOT NULL` is implied for table columns by default |
| E141-02 | UNIQUE constraint of NOT NULL columns | <span class="text-danger">No</span> | |
| E141-03 | PRIMARY KEY constraints | <span class="text-warning">Partial</span> | |
| E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | <span class="text-danger">No</span> | |
| E141-06 | CHECK constraint | <span class="text-success">Yes</span> | |
| E141-07 | Column defaults | <span class="text-success">Yes</span> | |
| E141-08 | NOT NULL inferred on PRIMARY KEY | <span class="text-success">Yes</span> | |
| E141-10 | Names in a foreign key can be specified in any order | <span class="text-danger">No</span> | |
| **E151** | **Transaction support** | <span class="text-danger">No</span> | |
| E151-01 | COMMIT statement | <span class="text-danger">No</span> | |
| E151-02 | ROLLBACK statement | <span class="text-danger">No</span> | |
| **E152** | **Basic SET TRANSACTION statement** | <span class="text-danger">No</span> | |
| E152-01 | SET TRANSACTION statement: ISOLATION LEVEL SERIALIZABLE clause | <span class="text-danger">No</span> | |
| E152-02 | SET TRANSACTION statement: READ ONLY and READ WRITE clauses | <span class="text-danger">No</span> | |
| **E153** | **Updatable queries with subqueries** | <span class="text-success">Yes</span> | |
| **E161** | **SQL comments using leading double minus** | <span class="text-success">Yes</span> | |
| **E171** | **SQLSTATE support** | <span class="text-danger">No</span> | |
| **E182** | **Host language binding** | <span class="text-danger">No</span> | |
| **F031** | **Basic schema manipulation** | <span class="text-warning">Partial</span>| |
| F031-01 | CREATE TABLE statement to create persistent base tables | <span class="text-warning">Partial</span> | No `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` clauses and no support for user resolved data types |
| F031-02 | CREATE VIEW statement | <span class="text-warning">Partial</span> | No `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` clauses and no support for user resolved data types |
| F031-03 | GRANT statement | <span class="text-success">Yes</span> | |
| F031-04 | ALTER TABLE statement: ADD COLUMN clause | <span class="text-success">Yes</span> | No support for `GENERATED` clause and system time period |
| F031-13 | DROP TABLE statement: RESTRICT clause | <span class="text-danger">No</span> | |
| F031-16 | DROP VIEW statement: RESTRICT clause | <span class="text-danger">No</span> | |
| F031-19 | REVOKE statement: RESTRICT clause | <span class="text-danger">No</span> | |
| **F041** | **Basic joined table** | <span class="text-warning">Partial</span> | |
| F041-01 | Inner join (but not necessarily the INNER keyword) | <span class="text-success">Yes</span> | |
| F041-02 | INNER keyword | <span class="text-success">Yes</span> | |
| F041-03 | LEFT OUTER JOIN | <span class="text-success">Yes</span> | |
| F041-04 | RIGHT OUTER JOIN | <span class="text-success">Yes</span> | |
| F041-05 | Outer joins can be nested | <span class="text-success">Yes</span> | |
| F041-07 | The inner table in a left or right outer join can also be used in an inner join | <span class="text-success">Yes</span> | |
| F041-08 | All comparison operators are supported (rather than just =) | <span class="text-danger">No</span> | |
| **F051** | **Basic date and time** | <span class="text-warning">Partial</span> | |
| F051-01 | DATE data type (including support of DATE literal) | <span class="text-success">Yes</span> | |
| F051-02 | TIME data type (including support of TIME literal) with fractional seconds precision of at least 0 | <span class="text-danger">No</span> | |
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | <span class="text-success">Yes</span> | |
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | <span class="text-success">Yes</span> | |
| F051-05 | Explicit CAST between datetime types and character string types | <span class="text-success">Yes</span> | |
| F051-06 | CURRENT_DATE | <span class="text-danger">No</span> | `today()` is similar |
| F051-07 | LOCALTIME | <span class="text-danger">No</span> | `now()` is similar |
| F051-08 | LOCALTIMESTAMP | <span class="text-danger">No</span> | |
| **F081** | **UNION and EXCEPT in views** | <span class="text-warning">Partial</span> | |
| **F131** | **Grouped operations** | <span class="text-warning">Partial</span> | |
| F131-01 | WHERE, GROUP BY, and HAVING clauses supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-02 | Multiple tables supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-03 | Set functions supported in queries with grouped views | <span class="text-success">Yes</span> | |
| F131-04 | Subqueries with GROUP BY and HAVING clauses and grouped views | <span class="text-success">Yes</span> | |
| F131-05 | Single row SELECT with GROUP BY and HAVING clauses and grouped views | <span class="text-danger">No</span> | |
| **F181** | **Multiple module support** | <span class="text-danger">No</span> | |
| **F201** | **CAST function** | <span class="text-success">Yes</span> | |
| **F221** | **Explicit defaults** | <span class="text-danger">No</span> | |
| **F261** | **CASE expression** | <span class="text-success">Yes</span> | |
| F261-01 | Simple CASE | <span class="text-success">Yes</span> | |
| F261-02 | Searched CASE | <span class="text-success">Yes</span> | |
| F261-03 | NULLIF | <span class="text-success">Yes</span> | |
| F261-04 | COALESCE | <span class="text-success">Yes</span> | |
| **F311** | **Schema definition statement** | <span class="text-warning">Partial</span> | |
| F311-01 | CREATE SCHEMA | <span class="text-warning">Partial</span> | See CREATE DATABASE |
| F311-02 | CREATE TABLE for persistent base tables | <span class="text-success">Yes</span> | |
| F311-03 | CREATE VIEW | <span class="text-success">Yes</span> | |
| F311-04 | CREATE VIEW: WITH CHECK OPTION | <span class="text-danger">No</span> | |
| F311-05 | GRANT statement | <span class="text-success">Yes</span> | |
| **F471** | **Scalar subquery values** | <span class="text-success">Yes</span> | |
| **F481** | **Expanded NULL predicate** | <span class="text-success">Yes</span> | |
| **F812** | **Basic flagging** | <span class="text-danger">No</span> | |
| **S011** | **Distinct data types** | | |
| **T321** | **Basic SQL-invoked routines** | <span class="text-danger">No</span> | |
| T321-01 | User-defined functions with no overloading | <span class="text-danger">No</span> | |
| T321-02 | User-defined stored procedures with no overloading | <span class="text-danger">No</span> | |
| T321-03 | Function invocation | <span class="text-danger">No</span> | |
| T321-04 | CALL statement | <span class="text-danger">No</span> | |
| T321-05 | RETURN statement | <span class="text-danger">No</span> | |
| **T631** | **IN predicate with one list element** | <span class="text-success">Yes</span> | |

View File

@ -2933,7 +2933,42 @@ The same as today() - 1.
## timeSlot
Rounds the time to the half hour.
Round the time to the start of a half-an-hour length interval.
**Syntax**
```sql
timeSlot(time[, time_zone])
```
**Arguments**
- `time` — Time to round to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md)/[Date32](../data-types/date32.md)/[DateTime64](../data-types/datetime64.md).
- `time_zone` — A String type const value or an expression representing the time zone. [String](../data-types/string.md).
:::note
Though this function can take values of the extended types `Date32` and `DateTime64` as an argument, passing it a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
:::
**Return type**
- Returns the time rounded to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md).
**Example**
Query:
```sql
SELECT timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'));
```
Result:
```response
┌─timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'))─┐
│ 2000-01-02 03:00:00 │
└────────────────────────────────────────────────────┘
```
## toYYYYMM

View File

@ -244,24 +244,42 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32);
└────────────────────────────────────────────────────────────────────────┘
```
## toIPv4(string)
## toIPv4
An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`.
Like [`IPv4StringToNum`](##IPv4NumToString(num)) but takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type.
**Syntax**
```sql
toIPv4(string)
```
**Arguments**
- `string` — IPv4 address. [String](../data-types/string.md).
**Returned value**
- `string` converted to the IPv4 address. [IPv4](../data-types/ipv4.md).
**Examples**
Query:
``` sql
WITH
'171.225.130.45' as IPv4_string
SELECT
toTypeName(IPv4StringToNum(IPv4_string)),
toTypeName(toIPv4(IPv4_string))
SELECT toIPv4('171.225.130.45');
```
Result:
``` text
┌─toTypeName(IPv4StringToNum(IPv4_string))─┬─toTypeName(toIPv4(IPv4_string))─┐
│ UInt32 │ IPv4 │
└──────────────────────────────────────────┴─────────────────────────────────┘
┌─toIPv4('171.225.130.45')─┐
171.225.130.45
└──────────────────────────┘
```
Query:
``` sql
WITH
'171.225.130.45' as IPv4_string
@ -270,91 +288,124 @@ SELECT
hex(toIPv4(IPv4_string))
```
Result:
``` text
┌─hex(IPv4StringToNum(IPv4_string))─┬─hex(toIPv4(IPv4_string))─┐
│ ABE1822D │ ABE1822D │
└───────────────────────────────────┴──────────────────────────┘
```
## toIPv4OrDefault(string)
## toIPv4OrDefault
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4).
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4), or the provided IPv4 default.
**Syntax**
```sql
toIPv4OrDefault(value)
toIPv4OrDefault(string[, default])
```
**Arguments**
- `value` — A string-encoded IPv4 address. [String](../data-types/string.md)
- `value` — IP address. [String](../data-types/string.md).
- `default` (optional) — The value to return if `string` has an invalid format. [IPv4](../data-types/ipv4.md).
**Returned value**
- `value` converted to an IPv4 address. [IPv4](../data-types/ipv4.md).
- `string` converted to the current IPv4 address. [String](../data-types/string.md).
**Example**
Query:
```sql
WITH
'::ffff:127.0.0.1' AS valid_IPv6_string,
'fe80:2030:31:24' AS invalid_IPv6_string
SELECT
toIPv4OrDefault('192.168.0.1') AS s1,
toIPv4OrDefault('192.168.0') AS s2
toIPv4OrDefault(valid_IPv6_string) AS valid,
toIPv4OrDefault(invalid_IPv6_string) AS default,
toIPv4OrDefault(invalid_IPv6_string, toIPv4('1.1.1.1')) AS provided_default;
```
Result:
```response
┌─s1──────────┬─s2──────┐
192.168.0.1 │ 0.0.0.0
└─────────────┴─────────┘
┌─valid───┬─default─┬─provided_default─┐
0.0.0.0 │ 0.0.0.0 │ 1.1.1.1
└─────────┴─────────┴──────────────────┘
```
## toIPv4OrNull(string)
## toIPv4OrNull
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null.
Same as [`toIPv4`](#toipv4), but if the IPv4 address has an invalid format, it returns null.
**Syntax**
```sql
toIPv4OrNull(value)
toIPv4OrNull(string)
```
**Arguments**
- `value` — A string-encoded IPv4 address. [String](../data-types/string.md)
- `string` — IP address. [String](../data-types/string.md).
**Returned value**
- `value` converted to an IPv4 address. [IPv4](../data-types/ipv4.md).
- `string` converted to the current IPv4 address, or null if `string` is an invalid address. [String](../data-types/string.md).
**Example**
Query:
```sql
SELECT
toIPv4OrNull('192.168.0.1') AS s1,
toIPv4OrNull('192.168.0') AS s2
``` sql
WITH 'fe80:2030:31:24' AS invalid_IPv6_string
SELECT toIPv4OrNull(invalid_IPv6_string);
```
Result:
```response
┌─s1──────────┬─s2───┐
│ 192.168.0.1 │ ᴺᵁᴸᴸ │
└───────────────────┘
``` text
┌─toIPv4OrNull(invalid_IPv6_string)─┐
│ ᴺᵁᴸᴸ
└───────────────────────────────────┘
```
## toIPv6OrDefault(string)
## toIPv4OrZero
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6).
Same as [`toIPv4`](#toipv4), but if the IPv4 address has an invalid format, it returns `0.0.0.0`.
## toIPv6OrNull(string)
**Syntax**
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null.
```sql
toIPv4OrZero(string)
```
**Arguments**
- `string` — IP address. [String](../data-types/string.md).
**Returned value**
- `string` converted to the current IPv4 address, or `0.0.0.0` if `string` is an invalid address. [String](../data-types/string.md).
**Example**
Query:
``` sql
WITH 'Not an IP address' AS invalid_IPv6_string
SELECT toIPv4OrZero(invalid_IPv6_string);
```
Result:
``` text
┌─toIPv4OrZero(invalid_IPv6_string)─┐
│ 0.0.0.0 │
└───────────────────────────────────┘
```
## toIPv6
@ -371,7 +422,7 @@ toIPv6(string)
**Argument**
- `string` — IP address. [String](../data-types/string.md)
- `string` — IP address. [String](../data-types/string.md).
**Returned value**
@ -410,6 +461,117 @@ Result:
└─────────────────────┘
```
## toIPv6OrDefault
Same as [`toIPv6`](#toipv6), but if the IPv6 address has an invalid format, it returns `::` (0 IPv6) or the provided IPv6 default.
**Syntax**
```sql
toIPv6OrDefault(string[, default])
```
**Argument**
- `string` — IP address. [String](../data-types/string.md).
- `default` (optional) — The value to return if `string` has an invalid format. [IPv6](../data-types/ipv6.md).
**Returned value**
- IPv6 address [IPv6](../data-types/ipv6.md), otherwise `::` or the provided optional default if `string` has an invalid format.
**Example**
Query:
``` sql
WITH
'127.0.0.1' AS valid_IPv4_string,
'127.0.0.1.6' AS invalid_IPv4_string
SELECT
toIPv6OrDefault(valid_IPv4_string) AS valid,
toIPv6OrDefault(invalid_IPv4_string) AS default,
toIPv6OrDefault(invalid_IPv4_string, toIPv6('1.1.1.1')) AS provided_default
```
Result:
``` text
┌─valid────────────┬─default─┬─provided_default─┐
│ ::ffff:127.0.0.1 │ :: │ ::ffff:1.1.1.1 │
└──────────────────┴─────────┴──────────────────┘
```
## toIPv6OrNull
Same as [`toIPv6`](#toipv6), but if the IPv6 address has an invalid format, it returns null.
**Syntax**
```sql
toIPv6OrNull(string)
```
**Argument**
- `string` — IP address. [String](../data-types/string.md).
**Returned value**
- IP address. [IPv6](../data-types/ipv6.md), or null if `string` is not a valid format.
**Example**
Query:
``` sql
WITH '127.0.0.1.6' AS invalid_IPv4_string
SELECT toIPv6OrNull(invalid_IPv4_string);
```
Result:
``` text
┌─toIPv6OrNull(invalid_IPv4_string)─┐
│ ᴺᵁᴸᴸ │
└───────────────────────────────────┘
```
## toIPv6OrZero
Same as [`toIPv6`](#toipv6), but if the IPv6 address has an invalid format, it returns `::`.
**Syntax**
```sql
toIPv6OrZero(string)
```
**Argument**
- `string` — IP address. [String](../data-types/string.md).
**Returned value**
- IP address. [IPv6](../data-types/ipv6.md), or `::` if `string` is not a valid format.
**Example**
Query:
``` sql
WITH '127.0.0.1.6' AS invalid_IPv4_string
SELECT toIPv6OrZero(invalid_IPv4_string);
```
Result:
``` text
┌─toIPv6OrZero(invalid_IPv4_string)─┐
│ :: │
└───────────────────────────────────┘
```
## IPv6StringToNumOrDefault(s)
Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns 0.

View File

@ -4390,3 +4390,37 @@ Result:
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## globalVariable
Takes constant string argument and returns the value of global variable with that name. It is intended for compatibility with MySQL.
**Syntax**
```sql
globalVariable(name)
```
**Arguments**
- `name` — Global variable name. [String](../data-types/string.md).
**Returned value**
- Returns the value of variable `name`.
**Example**
Query:
```sql
SELECT globalVariable('max_allowed_packet');
```
Result:
```response
┌─globalVariable('max_allowed_packet')─┐
│ 67108864 │
└──────────────────────────────────────┘
```

View File

@ -1,2 +0,0 @@
# Just an empty yaml file. Keep it alone.
{}

View File

@ -7,190 +7,3 @@ sidebar_label: "[experimental] MaterializedMySQL"
# [экспериментальный] MaterializedMySQL {#materialized-mysql}
**Это экспериментальный движок, который не следует использовать в продакшене.**
Создает базу данных ClickHouse со всеми таблицами, существующими в MySQL, и всеми данными в этих таблицах.
Сервер ClickHouse работает как реплика MySQL. Он читает файл binlog и выполняет DDL and DML-запросы.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
```
**Параметры движка**
- `host:port` — адрес сервера MySQL.
- `database` — имя базы данных на удалённом сервере.
- `user` — пользователь MySQL.
- `password` — пароль пользователя.
**Настройки движка**
- `max_rows_in_buffer` — максимальное количество строк, содержимое которых может кешироваться в памяти (для одной таблицы и данных кеша, которые невозможно запросить). При превышении количества строк, данные будут материализованы. Значение по умолчанию: `65 505`.
- `max_bytes_in_buffer` — максимальное количество байтов, которое разрешено кешировать в памяти (для одной таблицы и данных кеша, которые невозможно запросить). При превышении количества строк, данные будут материализованы. Значение по умолчанию: `1 048 576`.
- `max_rows_in_buffers` — максимальное количество строк, содержимое которых может кешироваться в памяти (для базы данных и данных кеша, которые невозможно запросить). При превышении количества строк, данные будут материализованы. Значение по умолчанию: `65 505`.
- `max_bytes_in_buffers` — максимальное количество байтов, которое разрешено кешировать данным в памяти (для базы данных и данных кеша, которые невозможно запросить). При превышении количества строк, данные будут материализованы. Значение по умолчанию: `1 048 576`.
- `max_flush_data_time` — максимальное время в миллисекундах, в течение которого разрешено кешировать данные в памяти (для базы данных и данных кеша, которые невозможно запросить). При превышении количества указанного периода, данные будут материализованы. Значение по умолчанию: `1000`.
- `max_wait_time_when_mysql_unavailable` — интервал между повторными попытками, если MySQL недоступен. Указывается в миллисекундах. Отрицательное значение отключает повторные попытки. Значение по умолчанию: `1000`.
- `allows_query_when_mysql_lost` — признак, разрешен ли запрос к материализованной таблице при потере соединения с MySQL. Значение по умолчанию: `0` (`false`).
```sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
**Настройки на стороне MySQL-сервера**
Для правильной работы `MaterializedMySQL` следует обязательно указать на сервере MySQL следующие параметры конфигурации:
- `default_authentication_plugin = mysql_native_password``MaterializedMySQL` может авторизоваться только с помощью этого метода.
- `gtid_mode = on` — ведение журнала на основе GTID является обязательным для обеспечения правильной репликации.
:::note Внимание
При включении `gtid_mode` вы также должны указать `enforce_gtid_consistency = on`.
:::
## Виртуальные столбцы {#virtual-columns}
При работе с движком баз данных `MaterializedMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`.
- `_version` — счетчик транзакций. Тип [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — метка удаления. Тип [Int8](../../sql-reference/data-types/int-uint.md). Возможные значения:
- `1` — строка не удалена,
- `-1` — строка удалена.
## Поддержка типов данных {#data_types-support}
| MySQL | ClickHouse |
|-------------------------|--------------------------------------------------------------|
| TINY | [Int8](../../sql-reference/data-types/int-uint.md) |
| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) |
| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) |
| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) |
| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
Тип [Nullable](../../sql-reference/data-types/nullable.md) поддерживается.
Другие типы не поддерживаются. Если таблица MySQL содержит столбец другого типа, ClickHouse выдаст исключение "Неподдерживаемый тип данных" ("Unhandled data type") и остановит репликацию.
## Особенности и рекомендации {#specifics-and-recommendations}
### Ограничения совместимости {#compatibility-restrictions}
Кроме ограничений на типы данных, существует несколько ограничений по сравнению с базами данных MySQL, которые следует решить до того, как станет возможной репликация:
- Каждая таблица в MySQL должна содержать `PRIMARY KEY`.
- Репликация для таблиц, содержащих строки со значениями полей `ENUM` вне диапазона значений (определяется размерностью `ENUM`), не будет работать.
### DDL-запросы {#ddl-queries}
DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует.
### Репликация данных {#data-replication}
Данные являются неизменяемыми со стороны пользователя ClickHouse, но автоматически обновляются путём репликации следующих запросов из MySQL:
- Запрос `INSERT` конвертируется в ClickHouse в `INSERT` с `_sign=1`.
- Запрос `DELETE` конвертируется в ClickHouse в `INSERT` с `_sign=-1`.
- Запрос `UPDATE` конвертируется в ClickHouse в `INSERT` с `_sign=-1` и `INSERT` с `_sign=1`.
### Выборка из таблиц движка MaterializedMySQL {#select}
Запрос `SELECT` из таблиц движка `MaterializedMySQL` имеет некоторую специфику:
- Если в запросе `SELECT` напрямую не указан столбец `_version`, то используется модификатор [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираются только строки с `MAX(_version)`.
- Если в запросе `SELECT` напрямую не указан столбец `_sign`, то по умолчанию используется `WHERE _sign=1`. Таким образом, удаленные строки не включаются в результирующий набор.
- Результат включает комментарии к столбцам, если они существуют в таблицах базы данных MySQL.
### Конвертация индексов {#index-conversion}
Секции `PRIMARY KEY` и `INDEX` в MySQL конвертируются в кортежи `ORDER BY` в таблицах ClickHouse.
В таблицах ClickHouse данные физически хранятся в том порядке, который определяется секцией `ORDER BY`. Чтобы физически перегруппировать данные, используйте [материализованные представления](../../sql-reference/statements/create/view.md#materialized).
**Примечание**
- Строки с `_sign=-1` физически не удаляются из таблиц.
- Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializedMySQL`.
- Репликация может быть легко нарушена.
- Прямые операции изменения данных в таблицах и базах данных `MaterializedMySQL` запрещены.
- На работу `MaterializedMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializedMySQL`.
## Примеры использования {#examples-of-use}
Запросы в MySQL:
``` sql
mysql> CREATE DATABASE db;
mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
mysql> DELETE FROM db.test WHERE a=1;
mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
+---+------+------+
| a | b | c |
+---+------+------+
| 2 | 222 | Wow! |
+---+------+------+
```
База данных в ClickHouse, обмен данными с сервером MySQL:
База данных и созданная таблица:
``` sql
CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
``` text
┌─name─┐
│ test │
└──────┘
```
После вставки данных:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬──b─┐
│ 1 │ 11 │
│ 2 │ 22 │
└───┴────┘
```
После удаления данных, добавления столбца и обновления:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```

View File

@ -33,7 +33,7 @@ sidebar_label: "Отличительные возможности ClickHouse"
## Поддержка SQL {#sql-support}
ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL-стандартом.
ClickHouse поддерживает декларативный язык запросов SQL.
Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.mdx), а также скалярные подзапросы.

View File

@ -30,7 +30,7 @@ sidebar_label: "Настройки пользователей"
<profile>profile_name</profile>
<quota>default</quota>
<default_database>default<default_database>
<default_database>default</default_database>
<databases>
<database_name>
<table_name>

View File

@ -1,10 +0,0 @@
---
slug: /ru/sql-reference/ansi
sidebar_position: 40
sidebar_label: ANSI Compatibility
title: "ANSI Compatibility"
---
import Content from '@site/docs/en/sql-reference/ansi.md';
<Content />

View File

@ -37,7 +37,7 @@ ClickHouse会使用服务器上一切可用的资源从而以最自然的方
## 支持SQL {#zhi-chi-sql}
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同
ClickHouse支持一种基于SQL的声明式查询语言。
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。

View File

@ -1,191 +0,0 @@
---
slug: /zh/sql-reference/ansi
sidebar_position: 40
sidebar_label: "ANSI\u517C\u5BB9\u6027"
---
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
:::note
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::
## 行为差异 {#differences-in-behaviour}
下表列出了ClickHouse能够使用但与ANSI SQL规定有差异的查询特性。
| 功能ID | 功能名称 | 差异 |
| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| E011 | 数值型数据类型 | 带小数点的数字被视为近似值 (`Float64`)而不是精确值 (`Decimal`) |
| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage) |
| E141-01 | NOT NULL非空约束 | ClickHouse表中每一列默认为`NOT NULL` |
| E011-04 | 算术运算符 | ClickHouse在运算时会进行溢出而不是四舍五入。此外会根据自定义规则修改结果数据类型参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha) |
## 功能状态 {#feature-status}
| 功能ID | 功能名称 | 状态 | 注释 |
| -------- | ---------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **E011** | **数值型数据类型** | **部分**{.text-warning} | |
| E011-01 | INTEGER 整型和SMALLINT (小整型)数据类型 | 是 {.text-success} | |
| E011-02 | REAL 实数、DOUBLE PRECISION 双精度浮点数和FLOAT单精度浮点数数据类型数据类型 | 是 {.text-success} | |
| E011-03 | DECIMAL 精确数字和NUMERIC (精确数字)数据类型 | 是 {.text-success} | |
| E011-04 | 算术运算符 | 是 {.text-success} | |
| E011-05 | 数值比较 | 是 {.text-success} | |
| E011-06 | 数值数据类型之间的隐式转换 | 否 {.text-danger} | ANSI SQL允许在数值类型之间进行任意隐式转换而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
| **E021** | **字符串类型** | **部分**{.text-warning} | |
| E021-01 | CHARACTER (字符串)数据类型 | 是 {.text-success} | |
| E021-02 | CHARACTER VARYING (可变字符串)数据类型 | 是 {.text-success} | |
| E021-03 | 字符字面量 | 是 {.text-success} | |
| E021-04 | CHARACTER_LENGTH 函数 | 部分 {.text-warning} | 不支持 `using` 从句 |
| E021-05 | OCTET_LENGTH 函数 | 否 {.text-danger} | 使用 `LENGTH` 函数代替 |
| E021-06 | SUBSTRING | 部分 {.text-warning} | 不支持 `SIMILAR``ESCAPE` 从句,没有`SUBSTRING_REGEX` 函数 |
| E021-07 | 字符串拼接 | 部分 {.text-warning} | 不支持 `COLLATE` 从句 |
| E021-08 | 大小写转换 | 是 {.text-success} | |
| E021-09 | 裁剪字符串 | 是 {.text-success} | |
| E021-10 | 固定长度和可变长度字符串类型之间的隐式转换 | 部分 {.text-warning} | ANSI SQL允许在数据类型之间进行任意隐式转换而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数 |
| E021-11 | POSITION 函数 | 部分 {.text-warning} | 不支持 `IN``USING` 从句,不支持`POSITION_REGEX`函数 |
| E021-12 | 字符串比较 | 是 {.text-success} | |
| **E031** | **标识符** | **部分**{.text-warning} | |
| E031-01 | 分隔标识符 | 部分 {.text-warning} | Unicode文字支持有限 |
| E031-02 | 小写标识符 | 是 {.text-success} | |
| E031-03 | 标识符最后加下划线 | 是 {.text-success} | |
| **E051** | **基本查询规范** | **部分**{.text-warning} | |
| E051-01 | SELECT DISTINCT | 是 {.text-success} | |
| E051-02 | GROUP BY 从句 | 是 {.text-success} | |
| E051-04 | GROUP BY 从句中的列可以包含不在 `<select list>`中出现的列 | 是 {.text-success} | |
| E051-05 | SELECT 的列可以重命名 | 是 {.text-success} | |
| E051-06 | HAVING 从句 | 是 {.text-success} | |
| E051-07 | SELECT 选择的列中允许出现\* | 是 {.text-success} | |
| E051-08 | FROM 从句中的关联名称 | 是 {.text-success} | |
| E051-09 | 重命名 FROM 从句中的列 | 否 {.text-danger} | |
| **E061** | **基本谓词和搜索条件** | **部分**{.text-warning} | |
| E061-01 | 比较谓词 | 是 {.text-success} | |
| E061-02 | BETWEEN 谓词 | 部分 {.text-warning} | 不支持 `SYMMETRIC``ASYMMETRIC` 从句 |
| E061-03 | IN 谓词后可接值列表 | 是 {.text-success} | |
| E061-04 | LIKE 谓词 | 是 {.text-success} | |
| E061-05 | LIKE 谓词后接 ESCAPE 从句 | 否 {.text-danger} | |
| E061-06 | NULL 谓词 | 是 {.text-success} | |
| E061-07 | 量化比较谓词ALL、SOME、ANY | 否 {.text-danger} | |
| E061-08 | EXISTS 谓词 | 否 {.text-danger} | |
| E061-09 | 比较谓词中的子查询 | 是 {.text-success} | |
| E061-11 | IN 谓词中的子查询 | 是 {.text-success} | |
| E061-12 | 量化比较谓词BETWEEN、IN、LIKE中的子查询 | 否 {.text-danger} | |
| E061-13 | 相关子查询 | 否 {.text-danger} | |
| E061-14 | 搜索条件 | 是 {.text-success} | |
| **E071** | **基本查询表达式** | **部分**{.text-warning} | |
| E071-01 | UNION DISTINCT 表运算符 | 是 {.text-success} | |
| E071-02 | UNION ALL 表运算符 | 是 {.text-success} | |
| E071-03 | EXCEPT DISTINCT 表运算符 | 否 {.text-danger} | |
| E071-05 | 通过表运算符组合的列不必具有完全相同的数据类型 | 是 {.text-success} | |
| E071-06 | 子查询中的表运算符 | 是 {.text-success} | |
| **E081** | **基本权限** | **是**{.text-success} | |
| E081-01 | 表级别的SELECT查询权限 | 是 {.text-success} | |
| E081-02 | DELETE删除权限 | 是 {.text-success} | |
| E081-03 | 表级别的INSERT插入权限 | 是 {.text-success} | |
| E081-04 | 表级别的UPDATE更新权限 | 是 {.text-success} | |
| E081-05 | 列级别的UPDATE更新权限 | 是 {.text-success} | |
| E081-06 | 表级别的REFERENCES引用权限 | 是 {.text-success} | |
| E081-07 | 列级别的REFERENCES引用权限 | 是 {.text-success} | |
| E081-08 | WITH GRANT OPTION | 是 {.text-success} | |
| E081-09 | USAGE使用权限 | 是 {.text-success} | |
| E081-10 | EXECUTE执行权限 | 是 {.text-success} | |
| **E091** | **集合函数** | **是**{.text-success} | |
| E091-01 | AVG | 是 {.text-success} | |
| E091-02 | COUNT | 是 {.text-success} | |
| E091-03 | MAX | 是 {.text-success} | |
| E091-04 | MIN | 是 {.text-success} | |
| E091-05 | SUM | 是 {.text-success} | |
| E091-06 | ALL修饰词 | 否。 {.text-danger} | |
| E091-07 | DISTINCT修饰词 | 是 {.text-success} | 并非所有聚合函数都支持该修饰词 |
| **E101** | **基本数据操作** | **部分**{.text-warning} | |
| E101-01 | INSERT插入语句 | 是 {.text-success} | 注ClickHouse中的主键并不隐含`UNIQUE` 约束 |
| E101-03 | 可指定范围的UPDATE更新语句 | 部分 {.text-warning} | `ALTER UPDATE` 语句用来批量更新数据 |
| E101-04 | 可指定范围的DELETE删除语句 | 部分 {.text-warning} | `ALTER DELETE` 语句用来批量删除数据 |
| **E111** | **返回一行的SELECT语句** | **否**{.text-danger} | |
| **E121** | **基本游标支持** | **否**{.text-danger} | |
| E121-01 | DECLARE CURSOR | 否 {.text-danger} | |
| E121-02 | ORDER BY 涉及的列不需要出现在SELECT的列中 | 是 {.text-success} | |
| E121-03 | ORDER BY 从句中的表达式 | 是 {.text-success} | |
| E121-04 | OPEN 语句 | 否 {.text-danger} | |
| E121-06 | 受游标位置控制的 UPDATE 语句 | 否 {.text-danger} | |
| E121-07 | 受游标位置控制的 DELETE 语句 | 否 {.text-danger} | |
| E121-08 | CLOSE 语句 | 否 {.text-danger} | |
| E121-10 | FETCH 语句中包含隐式NEXT | 否 {.text-danger} | |
| E121-17 | WITH HOLD 游标 | 否 {.text-danger} | |
| **E131** | **空值支持** | **是**{.text-success} | 有部分限制 |
| **E141** | **基本完整性约束** | **部分**{.text-warning} | |
| E141-01 | NOT NULL非空约束 | 是 {.text-success} | 注: 默认情况下ClickHouse表中的列隐含`NOT NULL`约束 |
| E141-02 | NOT NULL非空列的UNIQUE唯一约束 | 否 {.text-danger} | |
| E141-03 | 主键约束 | 部分 {.text-warning} | |
| E141-04 | 对于引用删除和引用更新操作基本的FOREIGN KEY外键约束默认不进行任何操作NO ACTION | 否 {.text-danger} | |
| E141-06 | CHECK检查约束 | 是 {.text-success} | |
| E141-07 | 列默认值 | 是 {.text-success} | |
| E141-08 | 在主键上推断非空 | 是 {.text-success} | |
| E141-10 | 可以按任何顺序指定外键中的名称 | 否 {.text-danger} | |
| **E151** | **事务支持** | **否**{.text-danger} | |
| E151-01 | COMMIT提交语句 | 否 {.text-danger} | |
| E151-02 | ROLLBACK回滚语句 | 否 {.text-danger} | |
| **E152** | **基本的SET TRANSACTION设置事务隔离级别语句** | **否**{.text-danger} | |
| E152-01 | SET TRANSACTION语句ISOLATION LEVEL SERIALIZABLE隔离级别为串行化从句 | 否 {.text-danger} | |
| E152-02 | SET TRANSACTION语句READ ONLY只读和READ WRITE读写从句 | 否 {.text-danger} | |
| **E153** | **具有子查询的可更新查询** | **是**{.text-success} | |
| **E161** | **使用“--”符号作为SQL注释** | **是**{.text-success} | |
| **E171** | **SQLSTATE支持** | **否**{.text-danger} | |
| **E182** | **主机语言绑定** | **否**{.text-danger} | |
| **F031** | **基本架构操作** | **部分**{.text-warning} | |
| F031-01 | 使用 CREATE TABLE 语句创建持久表 | 部分 {.text-warning} | 不支持 `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` 从句,不支持用户解析的数据类型 |
| F031-02 | CREATE VIEW创建视图语句 | 部分 {.text-warning} | 不支持 `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` 从句,不支持用户解析的数据类型 |
| F031-03 | GRANT授权语句 | 是 {.text-success} | |
| F031-04 | ALTER TABLE语句ADD COLUMN从句 | 是 {.text-success} | 不支持 `GENERATED` 从句和以系统时间做参数 |
| F031-13 | DROP TABLE语句RESTRICT从句 | 否 {.text-danger} | |
| F031-16 | DROP VIEW语句RESTRICT子句 | 否 {.text-danger} | |
| F031-19 | REVOKE语句RESTRICT子句 | 否 {.text-danger} | |
| **F041** | **基本连接关系** | **部分**{.text-warning} | |
| F041-01 | Inner join但不一定是INNER关键字) | 是 {.text-success} | |
| F041-02 | INNER 关键字 | 是 {.text-success} | |
| F041-03 | LEFT OUTER JOIN | 是 {.text-success} | |
| F041-04 | RIGHT OUTER JOIN | 是 {.text-success} | |
| F041-05 | 外连接可嵌套 | 是 {.text-success} | |
| F041-07 | 左外部连接或右外连接中的内部表也可用于内部联接 | 是 {.text-success} | |
| F041-08 | 支持所有比较运算符(而不仅仅是=) | 否 {.text-danger} | |
| **F051** | **基本日期和时间** | **部分**{.text-warning} | |
| F051-01 | DATE日期数据类型并支持用于表达日期的字面量) | 是 {.text-success} | |
| F051-02 | TIME时间数据类型并支持用于表达时间的字面量小数秒精度至少为0 | 否 {.text-danger} | |
| F051-03 | 时间戳数据类型并支持用于表达时间戳的字面量小数秒精度至少为0和6 | 是 {.text-danger} | |
| F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | |
| F051-05 | DateTime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | |
| F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 |
| F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 |
| F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | |
| **F081** | **视图的UNION和EXCEPT操作** | **部分**{.text-warning} | |
| **F131** | **分组操作** | **部分**{.text-warning} | |
| F131-01 | 在具有分组视图的查询中支持 WHERE、GROUP BY 和 HAVING 子句 | 是 {.text-success} | |
| F131-02 | 在分组视图中支持多张表 | 是 {.text-success} | |
| F131-03 | 分组视图的查询中支持集合函数 | 是 {.text-success} | |
| F131-04 | 带有 `GROUP BY``HAVING` 从句,以及分组视图的子查询 | 是 {.text-success} | |
| F131-05 | 带有 `GROUP BY``HAVING` 从句以及分组视图的仅返回1条记录的SELECT查询 | 否 {.text-danger} | |
| **F181** | **多模块支持** | **否**{.text-danger} | |
| **F201** | **CAST 函数** | **是**{.text-success} | |
| **F221** | **显式默认值** | **否**{.text-danger} | |
| **F261** | **CASE 表达式** | **是**{.text-success} | |
| F261-01 | 简单 CASE 表达式 | 是 {.text-success} | |
| F261-02 | 搜索型 CASE 表达式 | 是 {.text-success} | |
| F261-03 | NULLIF | 是 {.text-success} | |
| F261-04 | COALESCE | 是 {.text-success} | |
| **F311** | **架构定义语句** | **部分**{.text-warning} | |
| F311-01 | CREATE SCHEMA | 部分 {.text-warning} | 见`CREATE DATABASE` |
| F311-02 | 用于创建持久表的 CREATE TABLE | 是 {.text-success} | |
| F311-03 | CREATE VIEW | 是 {.text-success} | |
| F311-04 | CREATE VIEW: WITH CHECK OPTION | 否 {.text-danger} | |
| F311-05 | GRANT 语句 | 是 {.text-success} | |
| **F471** | **标量子查询** | **是**{.text-success} | |
| **F481** | **扩展 NULL 谓词** | **是**{.text-success} | |
| **F812** | **基本标志位** | **否**{.text-danger} |
| **S011** | **用于不重复数据的数据类型** | **否**{.text-danger} |
| **T321** | **基本的SQL调用例程** | **否**{.text-danger} | |
| T321-01 | 没有重载的用户定义函数 | 否{.text-danger} | |
| T321-02 | 没有重载的用户定义存储过程 | 否{.text-danger} | |
| T321-03 | 功能调用 | 否 {.text-danger} | |
| T321-04 | CALL 语句 | 否 {.text-danger} | |
| T321-05 | RETURN 语句 | 否 {.text-danger} | |
| **T631** | **IN 谓词后接一个列表** | **是**{.text-success} | |

View File

@ -512,6 +512,7 @@ void Client::connect()
{
std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl;
#ifndef CLICKHOUSE_CLOUD
auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch);
@ -527,6 +528,7 @@ void Client::connect()
<< "It may indicate that the server is out of date and can be upgraded." << std::endl
<< std::endl;
}
#endif
}
if (!client_context->getSettingsRef()[Setting::use_client_time_zone])

View File

@ -38,18 +38,10 @@ public:
String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
bool recursive = options.count("recursive");
if (!disk_from.getDisk()->exists(path_from))
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"cannot stat '{}' on disk '{}': No such file or directory",
path_from,
disk_from.getDisk()->getName());
}
if (disk_from.getDisk()->isFile(path_from))
if (disk_from.getDisk()->existsFile(path_from))
{
auto target_location = getTargetLocation(path_from, disk_to, path_to);
if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location))
if (!disk_to.getDisk()->existsDirectory(target_location))
{
disk_from.getDisk()->copyFile(
path_from,
@ -65,7 +57,7 @@ public:
ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from);
}
}
else if (disk_from.getDisk()->isDirectory(path_from))
else if (disk_from.getDisk()->existsDirectory(path_from))
{
if (!recursive)
{
@ -73,11 +65,11 @@ public:
}
auto target_location = getTargetLocation(path_from, disk_to, path_to);
if (disk_to.getDisk()->isFile(target_location))
if (disk_to.getDisk()->existsFile(target_location))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location);
}
if (!disk_to.getDisk()->exists(target_location))
if (!disk_to.getDisk()->existsDirectory(target_location))
{
disk_to.getDisk()->createDirectory(target_location);
}
@ -89,6 +81,14 @@ public:
/* write_settings= */ {},
/* cancellation_hook= */ {});
}
else
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"cannot stat '{}' on disk '{}': No such file or directory",
path_from,
disk_from.getDisk()->getName());
}
}
};

View File

@ -30,21 +30,21 @@ public:
String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
if (disk.getDisk()->isFile(path_from))
if (disk.getDisk()->existsFile(path_from))
{
disk.getDisk()->moveFile(path_from, path_to);
}
else if (disk.getDisk()->isDirectory(path_from))
else if (disk.getDisk()->existsDirectory(path_from))
{
auto target_location = getTargetLocation(path_from, disk, path_to);
if (!disk.getDisk()->exists(target_location))
if (!disk.getDisk()->existsDirectory(target_location))
{
disk.getDisk()->createDirectory(target_location);
disk.getDisk()->moveDirectory(path_from, target_location);
}
else
{
if (disk.getDisk()->isFile(target_location))
if (disk.getDisk()->existsFile(target_location))
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from);
@ -57,7 +57,7 @@ public:
disk.getDisk()->moveDirectory(path_from, target_location);
}
}
else if (!disk.getDisk()->exists(path_from))
else
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,

View File

@ -28,11 +28,7 @@ public:
auto disk = client.getCurrentDiskWithPath();
const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path"));
bool recursive = options.count("recursive");
if (!disk.getDisk()->exists(path))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName());
}
if (disk.getDisk()->isDirectory(path))
if (disk.getDisk()->existsDirectory(path))
{
if (!recursive)
{
@ -41,10 +37,12 @@ public:
disk.getDisk()->removeRecursive(path);
}
else
else if (disk.getDisk()->existsFile(path))
{
disk.getDisk()->removeFileIfExists(path);
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName());
}
};

View File

@ -1,6 +1,7 @@
#include <Interpreters/Context.h>
#include "ICommand.h"
#include <IO/ReadBufferFromEmptyFile.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/copyData.h>
@ -36,7 +37,11 @@ public:
return std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
String relative_path_from = disk.getRelativeFromRoot(path_from.value());
return disk.getDisk()->readFile(relative_path_from, getReadSettings());
auto res = disk.getDisk()->readFileIfExists(relative_path_from, getReadSettings());
if (res)
return res;
/// For backward compatibility.
return std::make_unique<ReadBufferFromEmptyFile>();
}();
auto out = disk.getDisk()->writeFile(path_to);

View File

@ -29,7 +29,7 @@ DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional<String> path_) : disk(di
}
String relative_path = normalizePathAndGetAsRelative(path);
if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/"))))
if (disk->existsDirectory(relative_path) || (relative_path.empty() && (disk->existsDirectory("/"))))
{
return;
}

View File

@ -33,7 +33,7 @@ public:
bool isDirectory(const String & any_path) const
{
return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/")));
return disk->existsDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->existsDirectory("/")));
}
std::vector<String> listAllFilesByPath(const String & any_path) const;

View File

@ -90,7 +90,7 @@ protected:
String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to)
{
if (!disk_to.getDisk()->isDirectory(path_to))
if (!disk_to.getDisk()->existsDirectory(path_to))
{
return path_to;
}

View File

@ -594,7 +594,10 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
catch (...)
{
tryLogCurrentException(log);
out.cancel();
}
return;
}
try

View File

@ -77,6 +77,42 @@ namespace Setting
extern const SettingsLocalFSReadMethod storage_file_read_method;
}
namespace ServerSetting
{
extern const ServerSettingsDouble cache_size_to_ram_max_ratio;
extern const ServerSettingsUInt64 compiled_expression_cache_elements_size;
extern const ServerSettingsUInt64 compiled_expression_cache_size;
extern const ServerSettingsUInt64 database_catalog_drop_table_concurrency;
extern const ServerSettingsString default_database;
extern const ServerSettingsString index_mark_cache_policy;
extern const ServerSettingsUInt64 index_mark_cache_size;
extern const ServerSettingsDouble index_mark_cache_size_ratio;
extern const ServerSettingsString index_uncompressed_cache_policy;
extern const ServerSettingsUInt64 index_uncompressed_cache_size;
extern const ServerSettingsDouble index_uncompressed_cache_size_ratio;
extern const ServerSettingsUInt64 io_thread_pool_queue_size;
extern const ServerSettingsString mark_cache_policy;
extern const ServerSettingsUInt64 mark_cache_size;
extern const ServerSettingsDouble mark_cache_size_ratio;
extern const ServerSettingsUInt64 max_active_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_io_thread_pool_free_size;
extern const ServerSettingsUInt64 max_io_thread_pool_size;
extern const ServerSettingsUInt64 max_outdated_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_parts_cleaning_thread_pool_size;
extern const ServerSettingsUInt64 max_server_memory_usage;
extern const ServerSettingsDouble max_server_memory_usage_to_ram_ratio;
extern const ServerSettingsUInt64 max_thread_pool_free_size;
extern const ServerSettingsUInt64 max_thread_pool_size;
extern const ServerSettingsUInt64 max_unexpected_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 mmap_cache_size;
extern const ServerSettingsBool show_addresses_in_stack_traces;
extern const ServerSettingsUInt64 thread_pool_queue_size;
extern const ServerSettingsString uncompressed_cache_policy;
extern const ServerSettingsUInt64 uncompressed_cache_size;
extern const ServerSettingsDouble uncompressed_cache_size_ratio;
extern const ServerSettingsBool use_legacy_mongodb_integration;
}
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@ -157,9 +193,9 @@ void LocalServer::initialize(Poco::Util::Application & self)
server_settings.loadSettingsFromConfig(config());
GlobalThreadPool::initialize(
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
server_settings[ServerSetting::max_thread_pool_size],
server_settings[ServerSetting::max_thread_pool_free_size],
server_settings[ServerSetting::thread_pool_queue_size]);
#if USE_AZURE_BLOB_STORAGE
/// See the explanation near the same line in Server.cpp
@ -170,17 +206,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
#endif
getIOThreadPool().initialize(
server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size,
server_settings.io_thread_pool_queue_size);
server_settings[ServerSetting::max_io_thread_pool_size],
server_settings[ServerSetting::max_io_thread_pool_free_size],
server_settings[ServerSetting::io_thread_pool_queue_size]);
const size_t active_parts_loading_threads = server_settings.max_active_parts_loading_thread_pool_size;
const size_t active_parts_loading_threads = server_settings[ServerSetting::max_active_parts_loading_thread_pool_size];
getActivePartsLoadingThreadPool().initialize(
active_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
active_parts_loading_threads);
const size_t outdated_parts_loading_threads = server_settings.max_outdated_parts_loading_thread_pool_size;
const size_t outdated_parts_loading_threads = server_settings[ServerSetting::max_outdated_parts_loading_thread_pool_size];
getOutdatedPartsLoadingThreadPool().initialize(
outdated_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -188,7 +224,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t unexpected_parts_loading_threads = server_settings.max_unexpected_parts_loading_thread_pool_size;
const size_t unexpected_parts_loading_threads = server_settings[ServerSetting::max_unexpected_parts_loading_thread_pool_size];
getUnexpectedPartsLoadingThreadPool().initialize(
unexpected_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -196,16 +232,16 @@ void LocalServer::initialize(Poco::Util::Application & self)
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t cleanup_threads = server_settings.max_parts_cleaning_thread_pool_size;
const size_t cleanup_threads = server_settings[ServerSetting::max_parts_cleaning_thread_pool_size];
getPartsCleaningThreadPool().initialize(
cleanup_threads,
0, // We don't need any threads one all the parts will be deleted
cleanup_threads);
getDatabaseCatalogDropTablesThreadPool().initialize(
server_settings.database_catalog_drop_table_concurrency,
server_settings[ServerSetting::database_catalog_drop_table_concurrency],
0, // We don't need any threads if there are no DROP queries.
server_settings.database_catalog_drop_table_concurrency);
server_settings[ServerSetting::database_catalog_drop_table_concurrency]);
}
@ -296,9 +332,14 @@ void LocalServer::tryInitPath()
global_context->setUserFilesPath(""); /// user's files are everywhere
std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/");
std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts" / "");
global_context->setUserScriptsPath(user_scripts_path);
/// Set path for filesystem caches
String filesystem_caches_path(getClientConfiguration().getString("filesystem_caches_path", fs::path(path) / "cache" / ""));
if (!filesystem_caches_path.empty())
global_context->setFilesystemCachesPath(filesystem_caches_path);
/// top_level_domains_lists
const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
if (!top_level_domains_path.empty())
@ -470,7 +511,7 @@ try
UseSSL use_ssl;
thread_status.emplace();
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
StackTrace::setShowAddresses(server_settings[ServerSetting::show_addresses_in_stack_traces]);
setupSignalHandler();
@ -507,10 +548,10 @@ try
/// Don't initialize DateLUT
registerFunctions();
registerAggregateFunctions();
registerTableFunctions(server_settings.use_legacy_mongodb_integration);
registerTableFunctions(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDatabases();
registerStorages(server_settings.use_legacy_mongodb_integration);
registerDictionaries(server_settings.use_legacy_mongodb_integration);
registerStorages(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDictionaries(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDisks(/* global_skip_access_check= */ true);
registerFormats();
@ -659,8 +700,8 @@ void LocalServer::processConfig()
const size_t physical_server_memory = getMemoryAmount();
size_t max_server_memory_usage = server_settings.max_server_memory_usage;
double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio;
size_t max_server_memory_usage = server_settings[ServerSetting::max_server_memory_usage];
double max_server_memory_usage_to_ram_ratio = server_settings[ServerSetting::max_server_memory_usage_to_ram_ratio];
size_t default_max_server_memory_usage = static_cast<size_t>(physical_server_memory * max_server_memory_usage_to_ram_ratio);
@ -689,12 +730,12 @@ void LocalServer::processConfig()
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
const double cache_size_to_ram_max_ratio = server_settings.cache_size_to_ram_max_ratio;
const double cache_size_to_ram_max_ratio = server_settings[ServerSetting::cache_size_to_ram_max_ratio];
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
String uncompressed_cache_policy = server_settings[ServerSetting::uncompressed_cache_policy];
size_t uncompressed_cache_size = server_settings[ServerSetting::uncompressed_cache_size];
double uncompressed_cache_size_ratio = server_settings[ServerSetting::uncompressed_cache_size_ratio];
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -702,9 +743,9 @@ void LocalServer::processConfig()
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
String mark_cache_policy = server_settings[ServerSetting::mark_cache_policy];
size_t mark_cache_size = server_settings[ServerSetting::mark_cache_size];
double mark_cache_size_ratio = server_settings[ServerSetting::mark_cache_size_ratio];
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -714,9 +755,9 @@ void LocalServer::processConfig()
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
String index_uncompressed_cache_policy = server_settings[ServerSetting::index_uncompressed_cache_policy];
size_t index_uncompressed_cache_size = server_settings[ServerSetting::index_uncompressed_cache_size];
double index_uncompressed_cache_size_ratio = server_settings[ServerSetting::index_uncompressed_cache_size_ratio];
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
@ -724,9 +765,9 @@ void LocalServer::processConfig()
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
String index_mark_cache_policy = server_settings[ServerSetting::index_mark_cache_policy];
size_t index_mark_cache_size = server_settings[ServerSetting::index_mark_cache_size];
double index_mark_cache_size_ratio = server_settings[ServerSetting::index_mark_cache_size_ratio];
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
@ -734,7 +775,7 @@ void LocalServer::processConfig()
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = server_settings.mmap_cache_size;
size_t mmap_cache_size = server_settings[ServerSetting::mmap_cache_size];
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
@ -746,8 +787,8 @@ void LocalServer::processConfig()
global_context->setQueryCache(0, 0, 0, 0);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size;
size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size;
size_t compiled_expression_cache_max_size_in_bytes = server_settings[ServerSetting::compiled_expression_cache_size];
size_t compiled_expression_cache_max_elements = server_settings[ServerSetting::compiled_expression_cache_elements_size];
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
@ -767,7 +808,7 @@ void LocalServer::processConfig()
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
std::string default_database = server_settings.default_database;
std::string default_database = server_settings[ServerSetting::default_database];
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
global_context->setCurrentDatabase(default_database);
@ -853,6 +894,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
{
options_description.main_description->add_options()
("table,N", po::value<std::string>(), "name of the initial table")
("copy", "shortcut for format conversion, equivalent to: --query 'SELECT * FROM table'")
/// If structure argument is omitted then initial query is not generated
("structure,S", po::value<std::string>(), "structure of the initial table (list of column and type names)")
@ -925,6 +967,12 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as<std::string>());
if (options.count("wait_for_suggestions_to_load"))
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
if (options.count("copy"))
{
if (!queries.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--copy' and '--query' cannot be specified at the same time");
queries.emplace_back("SELECT * FROM table");
}
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)

View File

@ -164,6 +164,123 @@ namespace MergeTreeSetting
extern const MergeTreeSettingsBool allow_remote_fs_zero_copy_replication;
}
namespace ServerSetting
{
extern const ServerSettingsUInt32 asynchronous_heavy_metrics_update_period_s;
extern const ServerSettingsUInt32 asynchronous_metrics_update_period_s;
extern const ServerSettingsBool async_insert_queue_flush_on_shutdown;
extern const ServerSettingsUInt64 async_insert_threads;
extern const ServerSettingsBool async_load_databases;
extern const ServerSettingsUInt64 background_buffer_flush_schedule_pool_size;
extern const ServerSettingsUInt64 background_common_pool_size;
extern const ServerSettingsUInt64 background_distributed_schedule_pool_size;
extern const ServerSettingsUInt64 background_fetches_pool_size;
extern const ServerSettingsFloat background_merges_mutations_concurrency_ratio;
extern const ServerSettingsString background_merges_mutations_scheduling_policy;
extern const ServerSettingsUInt64 background_message_broker_schedule_pool_size;
extern const ServerSettingsUInt64 background_move_pool_size;
extern const ServerSettingsUInt64 background_pool_size;
extern const ServerSettingsUInt64 background_schedule_pool_size;
extern const ServerSettingsUInt64 backups_io_thread_pool_queue_size;
extern const ServerSettingsDouble cache_size_to_ram_max_ratio;
extern const ServerSettingsDouble cannot_allocate_thread_fault_injection_probability;
extern const ServerSettingsUInt64 cgroups_memory_usage_observer_wait_time;
extern const ServerSettingsUInt64 compiled_expression_cache_elements_size;
extern const ServerSettingsUInt64 compiled_expression_cache_size;
extern const ServerSettingsUInt64 concurrent_threads_soft_limit_num;
extern const ServerSettingsUInt64 concurrent_threads_soft_limit_ratio_to_cores;
extern const ServerSettingsUInt64 config_reload_interval_ms;
extern const ServerSettingsUInt64 database_catalog_drop_table_concurrency;
extern const ServerSettingsString default_database;
extern const ServerSettingsBool disable_internal_dns_cache;
extern const ServerSettingsUInt64 disk_connections_soft_limit;
extern const ServerSettingsUInt64 disk_connections_store_limit;
extern const ServerSettingsUInt64 disk_connections_warn_limit;
extern const ServerSettingsBool dns_allow_resolve_names_to_ipv4;
extern const ServerSettingsBool dns_allow_resolve_names_to_ipv6;
extern const ServerSettingsUInt64 dns_cache_max_entries;
extern const ServerSettingsInt32 dns_cache_update_period;
extern const ServerSettingsUInt32 dns_max_consecutive_failures;
extern const ServerSettingsBool enable_azure_sdk_logging;
extern const ServerSettingsBool format_alter_operations_with_parentheses;
extern const ServerSettingsUInt64 global_profiler_cpu_time_period_ns;
extern const ServerSettingsUInt64 global_profiler_real_time_period_ns;
extern const ServerSettingsDouble gwp_asan_force_sample_probability;
extern const ServerSettingsUInt64 http_connections_soft_limit;
extern const ServerSettingsUInt64 http_connections_store_limit;
extern const ServerSettingsUInt64 http_connections_warn_limit;
extern const ServerSettingsString index_mark_cache_policy;
extern const ServerSettingsUInt64 index_mark_cache_size;
extern const ServerSettingsDouble index_mark_cache_size_ratio;
extern const ServerSettingsString index_uncompressed_cache_policy;
extern const ServerSettingsUInt64 index_uncompressed_cache_size;
extern const ServerSettingsDouble index_uncompressed_cache_size_ratio;
extern const ServerSettingsUInt64 io_thread_pool_queue_size;
extern const ServerSettingsSeconds keep_alive_timeout;
extern const ServerSettingsString mark_cache_policy;
extern const ServerSettingsUInt64 mark_cache_size;
extern const ServerSettingsDouble mark_cache_size_ratio;
extern const ServerSettingsUInt64 max_active_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_backups_io_thread_pool_free_size;
extern const ServerSettingsUInt64 max_backups_io_thread_pool_size;
extern const ServerSettingsUInt64 max_concurrent_insert_queries;
extern const ServerSettingsUInt64 max_concurrent_queries;
extern const ServerSettingsUInt64 max_concurrent_select_queries;
extern const ServerSettingsInt32 max_connections;
extern const ServerSettingsUInt64 max_database_num_to_warn;
extern const ServerSettingsUInt32 max_database_replicated_create_table_thread_pool_size;
extern const ServerSettingsUInt64 max_dictionary_num_to_warn;
extern const ServerSettingsUInt64 max_io_thread_pool_free_size;
extern const ServerSettingsUInt64 max_io_thread_pool_size;
extern const ServerSettingsUInt64 max_keep_alive_requests;
extern const ServerSettingsUInt64 max_outdated_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_partition_size_to_drop;
extern const ServerSettingsUInt64 max_part_num_to_warn;
extern const ServerSettingsUInt64 max_parts_cleaning_thread_pool_size;
extern const ServerSettingsUInt64 max_server_memory_usage;
extern const ServerSettingsDouble max_server_memory_usage_to_ram_ratio;
extern const ServerSettingsUInt64 max_table_num_to_warn;
extern const ServerSettingsUInt64 max_table_size_to_drop;
extern const ServerSettingsUInt64 max_temporary_data_on_disk_size;
extern const ServerSettingsUInt64 max_thread_pool_free_size;
extern const ServerSettingsUInt64 max_thread_pool_size;
extern const ServerSettingsUInt64 max_unexpected_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_view_num_to_warn;
extern const ServerSettingsUInt64 max_waiting_queries;
extern const ServerSettingsUInt64 memory_worker_period_ms;
extern const ServerSettingsUInt64 merges_mutations_memory_usage_soft_limit;
extern const ServerSettingsDouble merges_mutations_memory_usage_to_ram_ratio;
extern const ServerSettingsString merge_workload;
extern const ServerSettingsUInt64 mmap_cache_size;
extern const ServerSettingsString mutation_workload;
extern const ServerSettingsUInt64 page_cache_chunk_size;
extern const ServerSettingsUInt64 page_cache_mmap_size;
extern const ServerSettingsUInt64 page_cache_size;
extern const ServerSettingsBool page_cache_use_madv_free;
extern const ServerSettingsBool page_cache_use_transparent_huge_pages;
extern const ServerSettingsBool prepare_system_log_tables_on_startup;
extern const ServerSettingsBool show_addresses_in_stack_traces;
extern const ServerSettingsBool shutdown_wait_backups_and_restores;
extern const ServerSettingsUInt64 shutdown_wait_unfinished;
extern const ServerSettingsBool shutdown_wait_unfinished_queries;
extern const ServerSettingsUInt64 storage_connections_soft_limit;
extern const ServerSettingsUInt64 storage_connections_store_limit;
extern const ServerSettingsUInt64 storage_connections_warn_limit;
extern const ServerSettingsUInt64 tables_loader_background_pool_size;
extern const ServerSettingsUInt64 tables_loader_foreground_pool_size;
extern const ServerSettingsString temporary_data_in_cache;
extern const ServerSettingsUInt64 thread_pool_queue_size;
extern const ServerSettingsString tmp_policy;
extern const ServerSettingsUInt64 total_memory_profiler_sample_max_allocation_size;
extern const ServerSettingsUInt64 total_memory_profiler_sample_min_allocation_size;
extern const ServerSettingsUInt64 total_memory_profiler_step;
extern const ServerSettingsDouble total_memory_tracker_sample_probability;
extern const ServerSettingsString uncompressed_cache_policy;
extern const ServerSettingsUInt64 uncompressed_cache_size;
extern const ServerSettingsDouble uncompressed_cache_size_ratio;
extern const ServerSettingsBool use_legacy_mongodb_integration;
}
}
namespace CurrentMetrics
@ -672,7 +789,7 @@ static void initializeAzureSDKLogger(
[[ maybe_unused ]] int server_logs_level)
{
#if USE_AZURE_BLOB_STORAGE
if (!server_settings.enable_azure_sdk_logging)
if (!server_settings[ServerSetting::enable_azure_sdk_logging])
return;
using AzureLogsLevel = Azure::Core::Diagnostics::Logger::Level;
@ -746,9 +863,9 @@ try
ServerSettings server_settings;
server_settings.loadSettingsFromConfig(config());
ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses);
ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings[ServerSetting::format_alter_operations_with_parentheses]);
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
StackTrace::setShowAddresses(server_settings[ServerSetting::show_addresses_in_stack_traces]);
#if USE_HDFS
/// This will point libhdfs3 to the right location for its config.
@ -794,10 +911,10 @@ try
registerInterpreters();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions(server_settings.use_legacy_mongodb_integration);
registerTableFunctions(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDatabases();
registerStorages(server_settings.use_legacy_mongodb_integration);
registerDictionaries(server_settings.use_legacy_mongodb_integration);
registerStorages(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDictionaries(server_settings[ServerSetting::use_legacy_mongodb_integration]);
registerDisks(/* global_skip_access_check= */ false);
registerFormats();
registerRemoteFileMetadatas();
@ -893,37 +1010,37 @@ try
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size,
has_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0,
has_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0);
server_settings[ServerSetting::max_thread_pool_size],
server_settings[ServerSetting::max_thread_pool_free_size],
server_settings[ServerSetting::thread_pool_queue_size],
has_trace_collector ? server_settings[ServerSetting::global_profiler_real_time_period_ns] : 0,
has_trace_collector ? server_settings[ServerSetting::global_profiler_cpu_time_period_ns] : 0);
if (has_trace_collector)
{
global_context->createTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
if (server_settings.total_memory_profiler_step)
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
if (server_settings[ServerSetting::total_memory_profiler_step])
total_memory_tracker.setProfilerStep(server_settings[ServerSetting::total_memory_profiler_step]);
if (server_settings.total_memory_tracker_sample_probability > 0.0)
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
if (server_settings[ServerSetting::total_memory_tracker_sample_probability] > 0.0)
total_memory_tracker.setSampleProbability(server_settings[ServerSetting::total_memory_tracker_sample_probability]);
if (server_settings.total_memory_profiler_sample_min_allocation_size)
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
if (server_settings[ServerSetting::total_memory_profiler_sample_min_allocation_size])
total_memory_tracker.setSampleMinAllocationSize(server_settings[ServerSetting::total_memory_profiler_sample_min_allocation_size]);
if (server_settings.total_memory_profiler_sample_max_allocation_size)
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
if (server_settings[ServerSetting::total_memory_profiler_sample_max_allocation_size])
total_memory_tracker.setSampleMaxAllocationSize(server_settings[ServerSetting::total_memory_profiler_sample_max_allocation_size]);
}
Poco::ThreadPool server_pool(
/* minCapacity */3,
/* maxCapacity */server_settings.max_connections,
/* maxCapacity */server_settings[ServerSetting::max_connections],
/* idleTime */60,
/* stackSize */POCO_THREAD_STACK_SIZE,
server_settings.global_profiler_real_time_period_ns,
server_settings.global_profiler_cpu_time_period_ns);
server_settings[ServerSetting::global_profiler_real_time_period_ns],
server_settings[ServerSetting::global_profiler_cpu_time_period_ns]);
std::mutex servers_lock;
std::vector<ProtocolServerAdapter> servers;
@ -937,13 +1054,13 @@ try
LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds());
});
MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms);
MemoryWorker memory_worker(global_context->getServerSettings()[ServerSetting::memory_worker_period_ms]);
/// This object will periodically calculate some metrics.
ServerAsynchronousMetrics async_metrics(
global_context,
server_settings.asynchronous_metrics_update_period_s,
server_settings.asynchronous_heavy_metrics_update_period_s,
server_settings[ServerSetting::asynchronous_metrics_update_period_s],
server_settings[ServerSetting::asynchronous_heavy_metrics_update_period_s],
[&]() -> std::vector<ProtocolServerMetrics>
{
std::vector<ProtocolServerMetrics> metrics;
@ -996,7 +1113,7 @@ try
LOG_INFO(log, "Closed all listening sockets.");
if (current_connections > 0)
current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings[ServerSetting::shutdown_wait_unfinished]);
if (current_connections)
LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
@ -1033,47 +1150,47 @@ try
#endif
getIOThreadPool().initialize(
server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size,
server_settings.io_thread_pool_queue_size);
server_settings[ServerSetting::max_io_thread_pool_size],
server_settings[ServerSetting::max_io_thread_pool_free_size],
server_settings[ServerSetting::io_thread_pool_queue_size]);
getBackupsIOThreadPool().initialize(
server_settings.max_backups_io_thread_pool_size,
server_settings.max_backups_io_thread_pool_free_size,
server_settings.backups_io_thread_pool_queue_size);
server_settings[ServerSetting::max_backups_io_thread_pool_size],
server_settings[ServerSetting::max_backups_io_thread_pool_free_size],
server_settings[ServerSetting::backups_io_thread_pool_queue_size]);
getActivePartsLoadingThreadPool().initialize(
server_settings.max_active_parts_loading_thread_pool_size,
server_settings[ServerSetting::max_active_parts_loading_thread_pool_size],
0, // We don't need any threads once all the parts will be loaded
server_settings.max_active_parts_loading_thread_pool_size);
server_settings[ServerSetting::max_active_parts_loading_thread_pool_size]);
getOutdatedPartsLoadingThreadPool().initialize(
server_settings.max_outdated_parts_loading_thread_pool_size,
server_settings[ServerSetting::max_outdated_parts_loading_thread_pool_size],
0, // We don't need any threads once all the parts will be loaded
server_settings.max_outdated_parts_loading_thread_pool_size);
server_settings[ServerSetting::max_outdated_parts_loading_thread_pool_size]);
/// It could grow if we need to synchronously wait until all the data parts will be loaded.
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
server_settings.max_active_parts_loading_thread_pool_size
server_settings[ServerSetting::max_active_parts_loading_thread_pool_size]
);
getUnexpectedPartsLoadingThreadPool().initialize(
server_settings.max_unexpected_parts_loading_thread_pool_size,
server_settings[ServerSetting::max_unexpected_parts_loading_thread_pool_size],
0, // We don't need any threads once all the parts will be loaded
server_settings.max_unexpected_parts_loading_thread_pool_size);
server_settings[ServerSetting::max_unexpected_parts_loading_thread_pool_size]);
/// It could grow if we need to synchronously wait until all the data parts will be loaded.
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(
server_settings.max_active_parts_loading_thread_pool_size
server_settings[ServerSetting::max_active_parts_loading_thread_pool_size]
);
getPartsCleaningThreadPool().initialize(
server_settings.max_parts_cleaning_thread_pool_size,
server_settings[ServerSetting::max_parts_cleaning_thread_pool_size],
0, // We don't need any threads one all the parts will be deleted
server_settings.max_parts_cleaning_thread_pool_size);
server_settings[ServerSetting::max_parts_cleaning_thread_pool_size]);
auto max_database_replicated_create_table_thread_pool_size = server_settings.max_database_replicated_create_table_thread_pool_size
? server_settings.max_database_replicated_create_table_thread_pool_size
auto max_database_replicated_create_table_thread_pool_size = server_settings[ServerSetting::max_database_replicated_create_table_thread_pool_size]
? server_settings[ServerSetting::max_database_replicated_create_table_thread_pool_size]
: getNumberOfCPUCoresToUse();
getDatabaseReplicatedCreateTablesThreadPool().initialize(
max_database_replicated_create_table_thread_pool_size,
@ -1081,9 +1198,9 @@ try
max_database_replicated_create_table_thread_pool_size);
getDatabaseCatalogDropTablesThreadPool().initialize(
server_settings.database_catalog_drop_table_concurrency,
server_settings[ServerSetting::database_catalog_drop_table_concurrency],
0, // We don't need any threads if there are no DROP queries.
server_settings.database_catalog_drop_table_concurrency);
server_settings[ServerSetting::database_catalog_drop_table_concurrency]);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))
@ -1320,18 +1437,18 @@ try
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone());
/// Storage with temporary data for processing of heavy queries.
if (!server_settings.tmp_policy.value.empty())
if (!server_settings[ServerSetting::tmp_policy].value.empty())
{
global_context->setTemporaryStoragePolicy(server_settings.tmp_policy, server_settings.max_temporary_data_on_disk_size);
global_context->setTemporaryStoragePolicy(server_settings[ServerSetting::tmp_policy], server_settings[ServerSetting::max_temporary_data_on_disk_size]);
}
else if (!server_settings.temporary_data_in_cache.value.empty())
else if (!server_settings[ServerSetting::temporary_data_in_cache].value.empty())
{
global_context->setTemporaryStorageInCache(server_settings.temporary_data_in_cache, server_settings.max_temporary_data_on_disk_size);
global_context->setTemporaryStorageInCache(server_settings[ServerSetting::temporary_data_in_cache], server_settings[ServerSetting::max_temporary_data_on_disk_size]);
}
else
{
std::string temporary_path = config().getString("tmp_path", path / "tmp/");
global_context->setTemporaryStoragePath(temporary_path, server_settings.max_temporary_data_on_disk_size);
global_context->setTemporaryStoragePath(temporary_path, server_settings[ServerSetting::max_temporary_data_on_disk_size]);
}
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
@ -1420,11 +1537,11 @@ try
/// Set up caches.
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings[ServerSetting::cache_size_to_ram_max_ratio]);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
String uncompressed_cache_policy = server_settings[ServerSetting::uncompressed_cache_policy];
size_t uncompressed_cache_size = server_settings[ServerSetting::uncompressed_cache_size];
double uncompressed_cache_size_ratio = server_settings[ServerSetting::uncompressed_cache_size_ratio];
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -1432,9 +1549,9 @@ try
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
String mark_cache_policy = server_settings[ServerSetting::mark_cache_policy];
size_t mark_cache_size = server_settings[ServerSetting::mark_cache_size];
double mark_cache_size_ratio = server_settings[ServerSetting::mark_cache_size_ratio];
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
@ -1442,16 +1559,16 @@ try
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
size_t page_cache_size = server_settings.page_cache_size;
size_t page_cache_size = server_settings[ServerSetting::page_cache_size];
if (page_cache_size != 0)
global_context->setPageCache(
server_settings.page_cache_chunk_size, server_settings.page_cache_mmap_size,
page_cache_size, server_settings.page_cache_use_madv_free,
server_settings.page_cache_use_transparent_huge_pages);
server_settings[ServerSetting::page_cache_chunk_size], server_settings[ServerSetting::page_cache_mmap_size],
page_cache_size, server_settings[ServerSetting::page_cache_use_madv_free],
server_settings[ServerSetting::page_cache_use_transparent_huge_pages]);
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
String index_uncompressed_cache_policy = server_settings[ServerSetting::index_uncompressed_cache_policy];
size_t index_uncompressed_cache_size = server_settings[ServerSetting::index_uncompressed_cache_size];
double index_uncompressed_cache_size_ratio = server_settings[ServerSetting::index_uncompressed_cache_size_ratio];
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
@ -1459,9 +1576,9 @@ try
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
String index_mark_cache_policy = server_settings[ServerSetting::index_mark_cache_policy];
size_t index_mark_cache_size = server_settings[ServerSetting::index_mark_cache_size];
double index_mark_cache_size_ratio = server_settings[ServerSetting::index_mark_cache_size_ratio];
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
@ -1469,7 +1586,7 @@ try
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = server_settings.mmap_cache_size;
size_t mmap_cache_size = server_settings[ServerSetting::mmap_cache_size];
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
@ -1489,8 +1606,8 @@ try
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size;
size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size;
size_t compiled_expression_cache_max_size_in_bytes = server_settings[ServerSetting::compiled_expression_cache_size];
size_t compiled_expression_cache_max_elements = server_settings[ServerSetting::compiled_expression_cache_elements_size];
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
@ -1509,7 +1626,7 @@ try
std::optional<CgroupsMemoryUsageObserver> cgroups_memory_usage_observer;
try
{
auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time;
auto wait_time = server_settings[ServerSetting::cgroups_memory_usage_observer_wait_time];
if (wait_time != 0)
cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time));
}
@ -1552,8 +1669,8 @@ try
ServerSettings new_server_settings;
new_server_settings.loadSettingsFromConfig(*config);
size_t max_server_memory_usage = new_server_settings.max_server_memory_usage;
double max_server_memory_usage_to_ram_ratio = new_server_settings.max_server_memory_usage_to_ram_ratio;
size_t max_server_memory_usage = new_server_settings[ServerSetting::max_server_memory_usage];
double max_server_memory_usage_to_ram_ratio = new_server_settings[ServerSetting::max_server_memory_usage_to_ram_ratio];
size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically.
size_t default_max_server_memory_usage = static_cast<size_t>(current_physical_server_memory * max_server_memory_usage_to_ram_ratio);
@ -1583,9 +1700,9 @@ try
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit;
size_t merges_mutations_memory_usage_soft_limit = new_server_settings[ServerSetting::merges_mutations_memory_usage_soft_limit];
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings[ServerSetting::merges_mutations_memory_usage_to_ram_ratio]);
if (merges_mutations_memory_usage_soft_limit == 0)
{
merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
@ -1593,7 +1710,7 @@ try
" ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
new_server_settings[ServerSetting::merges_mutations_memory_usage_to_ram_ratio]);
}
else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
{
@ -1602,7 +1719,7 @@ try
" ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
new_server_settings[ServerSetting::merges_mutations_memory_usage_to_ram_ratio]);
}
LOG_INFO(log, "Merges and mutations memory limit is set to {}",
@ -1635,31 +1752,32 @@ try
global_context->setRemoteHostFilter(*config);
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop);
global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop);
global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn);
global_context->setMaxViewNumToWarn(new_server_settings.max_view_num_to_warn);
global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn);
global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
global_context->setMaxTableSizeToDrop(new_server_settings[ServerSetting::max_table_size_to_drop]);
global_context->setMaxPartitionSizeToDrop(new_server_settings[ServerSetting::max_partition_size_to_drop]);
global_context->setMaxTableNumToWarn(new_server_settings[ServerSetting::max_table_num_to_warn]);
global_context->setMaxViewNumToWarn(new_server_settings[ServerSetting::max_view_num_to_warn]);
global_context->setMaxDictionaryNumToWarn(new_server_settings[ServerSetting::max_dictionary_num_to_warn]);
global_context->setMaxDatabaseNumToWarn(new_server_settings[ServerSetting::max_database_num_to_warn]);
global_context->setMaxPartNumToWarn(new_server_settings[ServerSetting::max_part_num_to_warn]);
/// Only for system.server_settings
global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms);
global_context->setConfigReloaderInterval(new_server_settings[ServerSetting::config_reload_interval_ms]);
SlotCount concurrent_threads_soft_limit = UnlimitedSlots;
if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
if (new_server_settings[ServerSetting::concurrent_threads_soft_limit_num] > 0 && new_server_settings[ServerSetting::concurrent_threads_soft_limit_num] < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = new_server_settings[ServerSetting::concurrent_threads_soft_limit_num];
if (new_server_settings[ServerSetting::concurrent_threads_soft_limit_ratio_to_cores] > 0)
{
auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfCPUCoresToUse();
auto value = new_server_settings[ServerSetting::concurrent_threads_soft_limit_ratio_to_cores] * getNumberOfCPUCoresToUse();
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}
ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
LOG_INFO(log, "ConcurrencyControl limit is set to {}", concurrent_threads_soft_limit);
global_context->getProcessList().setMaxSize(new_server_settings.max_concurrent_queries);
global_context->getProcessList().setMaxInsertQueriesAmount(new_server_settings.max_concurrent_insert_queries);
global_context->getProcessList().setMaxSelectQueriesAmount(new_server_settings.max_concurrent_select_queries);
global_context->getProcessList().setMaxWaitingQueriesAmount(new_server_settings.max_waiting_queries);
global_context->getProcessList().setMaxSize(new_server_settings[ServerSetting::max_concurrent_queries]);
global_context->getProcessList().setMaxInsertQueriesAmount(new_server_settings[ServerSetting::max_concurrent_insert_queries]);
global_context->getProcessList().setMaxSelectQueriesAmount(new_server_settings[ServerSetting::max_concurrent_select_queries]);
global_context->getProcessList().setMaxWaitingQueriesAmount(new_server_settings[ServerSetting::max_waiting_queries]);
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);
@ -1670,72 +1788,72 @@ try
/// This is done for backward compatibility.
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = new_server_settings.background_pool_size;
auto new_ratio = new_server_settings.background_merges_mutations_concurrency_ratio;
auto new_pool_size = new_server_settings[ServerSetting::background_pool_size];
auto new_ratio = new_server_settings[ServerSetting::background_merges_mutations_concurrency_ratio];
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, static_cast<size_t>(new_pool_size * new_ratio));
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_server_settings.background_merges_mutations_scheduling_policy.toString());
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_server_settings[ServerSetting::background_merges_mutations_scheduling_policy].toString());
}
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = new_server_settings.background_move_pool_size;
auto new_pool_size = new_server_settings[ServerSetting::background_move_pool_size];
global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = new_server_settings.background_fetches_pool_size;
auto new_pool_size = new_server_settings[ServerSetting::background_fetches_pool_size];
global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = new_server_settings.background_common_pool_size;
auto new_pool_size = new_server_settings[ServerSetting::background_common_pool_size];
global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_server_settings.background_buffer_flush_schedule_pool_size);
global_context->getSchedulePool().increaseThreadsCount(new_server_settings.background_schedule_pool_size);
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_server_settings.background_message_broker_schedule_pool_size);
global_context->getDistributedSchedulePool().increaseThreadsCount(new_server_settings.background_distributed_schedule_pool_size);
global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_server_settings[ServerSetting::background_buffer_flush_schedule_pool_size]);
global_context->getSchedulePool().increaseThreadsCount(new_server_settings[ServerSetting::background_schedule_pool_size]);
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_server_settings[ServerSetting::background_message_broker_schedule_pool_size]);
global_context->getDistributedSchedulePool().increaseThreadsCount(new_server_settings[ServerSetting::background_distributed_schedule_pool_size]);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, new_server_settings.tables_loader_foreground_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, new_server_settings.tables_loader_background_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, new_server_settings.tables_loader_background_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, new_server_settings[ServerSetting::tables_loader_foreground_pool_size]);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, new_server_settings[ServerSetting::tables_loader_background_pool_size]);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, new_server_settings[ServerSetting::tables_loader_background_pool_size]);
getIOThreadPool().reloadConfiguration(
new_server_settings.max_io_thread_pool_size,
new_server_settings.max_io_thread_pool_free_size,
new_server_settings.io_thread_pool_queue_size);
new_server_settings[ServerSetting::max_io_thread_pool_size],
new_server_settings[ServerSetting::max_io_thread_pool_free_size],
new_server_settings[ServerSetting::io_thread_pool_queue_size]);
getBackupsIOThreadPool().reloadConfiguration(
new_server_settings.max_backups_io_thread_pool_size,
new_server_settings.max_backups_io_thread_pool_free_size,
new_server_settings.backups_io_thread_pool_queue_size);
new_server_settings[ServerSetting::max_backups_io_thread_pool_size],
new_server_settings[ServerSetting::max_backups_io_thread_pool_free_size],
new_server_settings[ServerSetting::backups_io_thread_pool_queue_size]);
getActivePartsLoadingThreadPool().reloadConfiguration(
new_server_settings.max_active_parts_loading_thread_pool_size,
new_server_settings[ServerSetting::max_active_parts_loading_thread_pool_size],
0, // We don't need any threads once all the parts will be loaded
new_server_settings.max_active_parts_loading_thread_pool_size);
new_server_settings[ServerSetting::max_active_parts_loading_thread_pool_size]);
getOutdatedPartsLoadingThreadPool().reloadConfiguration(
new_server_settings.max_outdated_parts_loading_thread_pool_size,
new_server_settings[ServerSetting::max_outdated_parts_loading_thread_pool_size],
0, // We don't need any threads once all the parts will be loaded
new_server_settings.max_outdated_parts_loading_thread_pool_size);
new_server_settings[ServerSetting::max_outdated_parts_loading_thread_pool_size]);
/// It could grow if we need to synchronously wait until all the data parts will be loaded.
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
new_server_settings.max_active_parts_loading_thread_pool_size
new_server_settings[ServerSetting::max_active_parts_loading_thread_pool_size]
);
getPartsCleaningThreadPool().reloadConfiguration(
new_server_settings.max_parts_cleaning_thread_pool_size,
new_server_settings[ServerSetting::max_parts_cleaning_thread_pool_size],
0, // We don't need any threads one all the parts will be deleted
new_server_settings.max_parts_cleaning_thread_pool_size);
new_server_settings[ServerSetting::max_parts_cleaning_thread_pool_size]);
global_context->setMergeWorkload(new_server_settings.merge_workload);
global_context->setMutationWorkload(new_server_settings.mutation_workload);
global_context->setMergeWorkload(new_server_settings[ServerSetting::merge_workload]);
global_context->setMutationWorkload(new_server_settings[ServerSetting::mutation_workload]);
if (config->has("resources"))
{
@ -1780,28 +1898,28 @@ try
HTTPConnectionPools::instance().setLimits(
HTTPConnectionPools::Limits{
new_server_settings.disk_connections_soft_limit,
new_server_settings.disk_connections_warn_limit,
new_server_settings.disk_connections_store_limit,
new_server_settings[ServerSetting::disk_connections_soft_limit],
new_server_settings[ServerSetting::disk_connections_warn_limit],
new_server_settings[ServerSetting::disk_connections_store_limit],
},
HTTPConnectionPools::Limits{
new_server_settings.storage_connections_soft_limit,
new_server_settings.storage_connections_warn_limit,
new_server_settings.storage_connections_store_limit,
new_server_settings[ServerSetting::storage_connections_soft_limit],
new_server_settings[ServerSetting::storage_connections_warn_limit],
new_server_settings[ServerSetting::storage_connections_store_limit],
},
HTTPConnectionPools::Limits{
new_server_settings.http_connections_soft_limit,
new_server_settings.http_connections_warn_limit,
new_server_settings.http_connections_store_limit,
new_server_settings[ServerSetting::http_connections_soft_limit],
new_server_settings[ServerSetting::http_connections_warn_limit],
new_server_settings[ServerSetting::http_connections_store_limit],
});
DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6);
DNSResolver::instance().setFilterSettings(new_server_settings[ServerSetting::dns_allow_resolve_names_to_ipv4], new_server_settings[ServerSetting::dns_allow_resolve_names_to_ipv6]);
if (global_context->isServerCompletelyStarted())
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings[ServerSetting::cannot_allocate_thread_fault_injection_probability]);
#if USE_GWP_ASAN
GWPAsan::setForceSampleProbability(new_server_settings.gwp_asan_force_sample_probability);
GWPAsan::setForceSampleProbability(new_server_settings[ServerSetting::gwp_asan_force_sample_probability]);
#endif
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
@ -1997,7 +2115,7 @@ try
});
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
global_context->getProcessList().setMaxSize(server_settings[ServerSetting::max_concurrent_queries]);
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
@ -2006,12 +2124,12 @@ try
/// This is needed to load proper values of background_pool_size etc.
global_context->initializeBackgroundExecutorsIfNeeded();
if (server_settings.async_insert_threads)
if (server_settings[ServerSetting::async_insert_threads])
{
global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
global_context,
server_settings.async_insert_threads,
server_settings.async_insert_queue_flush_on_shutdown));
server_settings[ServerSetting::async_insert_threads],
server_settings[ServerSetting::async_insert_queue_flush_on_shutdown]));
}
/// Set path for format schema files
@ -2047,7 +2165,7 @@ try
/// context is destroyed.
/// In addition this object has to be created before the loading of the tables.
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (server_settings.disable_internal_dns_cache)
if (server_settings[ServerSetting::disable_internal_dns_cache])
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
@ -2055,11 +2173,11 @@ try
}
else
{
DNSResolver::instance().setCacheMaxEntries(server_settings.dns_cache_max_entries);
DNSResolver::instance().setCacheMaxEntries(server_settings[ServerSetting::dns_cache_max_entries]);
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures);
global_context, server_settings[ServerSetting::dns_cache_update_period], server_settings[ServerSetting::dns_max_consecutive_failures]);
}
if (dns_cache_updater)
@ -2067,7 +2185,7 @@ try
/// Set current database name before loading tables and databases because
/// system logs may copy global context.
std::string default_database = server_settings.default_database.toString();
std::string default_database = server_settings[ServerSetting::default_database].toString();
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading metadata from {}", path_str);
@ -2103,7 +2221,7 @@ try
waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks);
/// Startup scripts can depend on the system log tables.
if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed)
if (config().has("startup_scripts") && !server_settings[ServerSetting::prepare_system_log_tables_on_startup].changed)
global_context->setServerSetting("prepare_system_log_tables_on_startup", true);
/// After attaching system databases we can initialize system log.
@ -2123,7 +2241,7 @@ try
database_catalog.loadMarkedAsDroppedTables();
database_catalog.createBackgroundTasks();
/// Then, load remaining databases (some of them maybe be loaded asynchronously)
load_metadata_tasks = loadMetadata(global_context, default_database, server_settings.async_load_databases);
load_metadata_tasks = loadMetadata(global_context, default_database, server_settings[ServerSetting::async_load_databases]);
/// If we need to convert database engines, disable async tables loading
convertDatabasesEnginesIfNeed(load_metadata_tasks, global_context);
database_catalog.startupBackgroundTasks();
@ -2274,11 +2392,11 @@ try
startup_watch.stop();
ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings[ServerSetting::cannot_allocate_thread_fault_injection_probability]);
#if USE_GWP_ASAN
GWPAsan::initFinished();
GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability);
GWPAsan::setForceSampleProbability(server_settings[ServerSetting::gwp_asan_force_sample_probability]);
#endif
try
@ -2328,15 +2446,15 @@ try
/// Wait for unfinished backups and restores.
/// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries
/// (because killAllQueries() will cancel all running backups/restores).
if (server_settings.shutdown_wait_backups_and_restores)
if (server_settings[ServerSetting::shutdown_wait_backups_and_restores])
global_context->waitAllBackupsAndRestores();
/// Killing remaining queries.
if (!server_settings.shutdown_wait_unfinished_queries)
if (!server_settings[ServerSetting::shutdown_wait_unfinished_queries])
global_context->getProcessList().killAllQueries();
if (current_connections)
current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
current_connections = waitServersToFinish(servers, servers_lock, server_settings[ServerSetting::shutdown_wait_unfinished]);
if (current_connections)
LOG_WARNING(log, "Closed connections. But {} remain."
@ -2475,8 +2593,8 @@ void Server::createServers(
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings[Setting::http_receive_timeout]);
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
http_params->setKeepAliveTimeout(global_context->getServerSettings()[ServerSetting::keep_alive_timeout]);
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings()[ServerSetting::max_keep_alive_requests]));
Poco::Util::AbstractConfiguration::Keys protocols;
config.keys("protocols", protocols);
@ -2732,7 +2850,7 @@ void Server::createInterserverServers(
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
http_params->setTimeout(settings[Setting::http_receive_timeout]);
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
http_params->setKeepAliveTimeout(global_context->getServerSettings()[ServerSetting::keep_alive_timeout]);
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)

View File

@ -33,6 +33,12 @@ namespace DB
{
struct Settings;
namespace ServerSetting
{
extern const ServerSettingsGroupArrayActionWhenLimitReached aggregate_function_group_array_action_when_limit_is_reached;
extern const ServerSettingsUInt64 aggregate_function_group_array_max_element_size;
}
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@ -746,7 +752,7 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType
size_t getMaxArraySize()
{
if (auto context = Context::getGlobalContextInstance())
return context->getServerSettings().aggregate_function_group_array_max_element_size;
return context->getServerSettings()[ServerSetting::aggregate_function_group_array_max_element_size];
return 0xFFFFFF;
}
@ -754,7 +760,7 @@ size_t getMaxArraySize()
bool discardOnLimitReached()
{
if (auto context = Context::getGlobalContextInstance())
return context->getServerSettings().aggregate_function_group_array_action_when_limit_is_reached
return context->getServerSettings()[ServerSetting::aggregate_function_group_array_action_when_limit_is_reached]
== GroupArrayActionWhenLimitReached::DISCARD;
return false;

View File

@ -28,11 +28,36 @@ namespace ErrorCodes
namespace
{
constexpr size_t max_events = 32;
constexpr size_t MAX_EVENTS = 32;
template <typename T>
void mergeEventsList(T & events_list, size_t prefix_size, bool prefix_sorted, bool suffix_sorted)
{
/// either sort whole container or do so partially merging ranges afterwards
if (!prefix_sorted && !suffix_sorted)
std::stable_sort(std::begin(events_list), std::end(events_list));
else
{
const auto begin = std::begin(events_list);
const auto middle = std::next(begin, prefix_size);
const auto end = std::end(events_list);
if (!prefix_sorted)
std::stable_sort(begin, middle);
if (!suffix_sorted)
std::stable_sort(middle, end);
std::inplace_merge(begin, middle, end);
}
}
template <typename T>
struct AggregateFunctionWindowFunnelData
{
static constexpr bool strict_once_enabled = false;
using TimestampEvent = std::pair<T, UInt8>;
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
@ -66,24 +91,7 @@ struct AggregateFunctionWindowFunnelData
events_list.insert(std::begin(other.events_list), std::end(other.events_list));
/// either sort whole container or do so partially merging ranges afterwards
if (!sorted && !other.sorted)
std::stable_sort(std::begin(events_list), std::end(events_list));
else
{
const auto begin = std::begin(events_list);
const auto middle = std::next(begin, size);
const auto end = std::end(events_list);
if (!sorted)
std::stable_sort(begin, middle);
if (!other.sorted)
std::stable_sort(middle, end);
std::inplace_merge(begin, middle, end);
}
mergeEventsList(events_list, size, sorted, other.sorted);
sorted = true;
}
@ -133,6 +141,131 @@ struct AggregateFunctionWindowFunnelData
}
};
template <typename T>
struct AggregateFunctionWindowFunnelStrictOnceData
{
static constexpr bool strict_once_enabled = true;
struct TimestampEvent
{
T timestamp;
UInt8 event_type;
UInt64 unique_id;
TimestampEvent(T timestamp_, UInt8 event_type_, UInt64 unique_id_)
: timestamp(timestamp_), event_type(event_type_), unique_id(unique_id_) {}
bool operator<(const TimestampEvent & other) const
{
return std::tie(timestamp, event_type, unique_id) < std::tie(other.timestamp, other.event_type, other.unique_id);
}
bool operator<=(const TimestampEvent & other) const
{
return std::tie(timestamp, event_type, unique_id) <= std::tie(other.timestamp, other.event_type, other.unique_id);
}
};
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
TimestampEvents events_list;
/// Next unique identifier for events
/// Used to distinguish events with the same timestamp that matches several conditions.
UInt64 next_unique_id = 1;
bool sorted = true;
size_t size() const
{
return events_list.size();
}
void advanceId()
{
++next_unique_id;
}
void add(T timestamp, UInt8 event_type)
{
TimestampEvent new_event(timestamp, event_type, next_unique_id);
/// Check if the new event maintains the sorted order
if (sorted && !events_list.empty())
sorted = events_list.back() <= new_event;
events_list.push_back(new_event);
}
void merge(const AggregateFunctionWindowFunnelStrictOnceData & other)
{
if (other.events_list.empty())
return;
const auto current_size = events_list.size();
UInt64 new_next_unique_id = next_unique_id;
events_list.reserve(current_size + other.events_list.size());
for (auto other_event : other.events_list)
{
/// Assign unique IDs to the new events to prevent conflicts
other_event.unique_id += next_unique_id;
new_next_unique_id = std::max(new_next_unique_id, other_event.unique_id + 1);
events_list.push_back(other_event);
}
next_unique_id = new_next_unique_id;
mergeEventsList(events_list, current_size, sorted, other.sorted);
sorted = true;
}
void sort()
{
if (!sorted)
{
std::stable_sort(std::begin(events_list), std::end(events_list));
sorted = true;
}
}
void serialize(WriteBuffer & buf) const
{
writeBinary(sorted, buf);
writeBinary(events_list.size(), buf);
for (const auto & event : events_list)
{
writeBinary(event.timestamp, buf);
writeBinary(event.event_type, buf);
writeBinary(event.unique_id, buf);
}
}
void deserialize(ReadBuffer & buf)
{
readBinary(sorted, buf);
size_t events_size;
readBinary(events_size, buf);
if (events_size > 100'000'000) /// Arbitrary limit to prevent excessive memory allocation
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large size of the state of windowFunnel");
events_list.clear();
events_list.reserve(events_size);
T timestamp;
UInt8 event_type;
UInt64 unique_id = 0;
for (size_t i = 0; i < events_size; ++i)
{
readBinary(timestamp, buf);
readBinary(event_type, buf);
readBinary(unique_id, buf);
next_unique_id = std::max(next_unique_id, unique_id + 1);
events_list.emplace_back(timestamp, event_type, unique_id);
}
}
};
/** Calculates the max event level in a sliding window.
* The max size of events is 32, that's enough for funnel analytics
*
@ -160,22 +293,15 @@ private:
/// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
/// If found, returns the max event level, else return 0.
/// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
UInt8 getEventLevel(Data & data) const
UInt8 getEventLevelNonStrictOnce(const AggregateFunctionWindowFunnelData<T>::TimestampEvents & events_list) const
{
if (data.size() == 0)
return 0;
if (!strict_order && events_size == 1)
return 1;
data.sort();
/// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
bool first_event = false;
for (size_t i = 0; i < data.events_list.size(); ++i)
for (size_t i = 0; i < events_list.size(); ++i)
{
const T & timestamp = data.events_list[i].first;
const auto & event_idx = data.events_list[i].second - 1;
const T & timestamp = events_list[i].first;
const auto & event_idx = events_list[i].second - 1;
if (strict_order && event_idx == -1)
{
if (first_event)
@ -189,7 +315,7 @@ private:
}
else if (strict_deduplication && events_timestamp[event_idx].has_value())
{
return data.events_list[i - 1].second;
return events_list[i - 1].second;
}
else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
{
@ -222,6 +348,126 @@ private:
return 0;
}
UInt8 getEventLevelStrictOnce(const AggregateFunctionWindowFunnelStrictOnceData<T>::TimestampEvents & events_list) const
{
/// Stores the timestamp of the first and last i-th level event happen within time window
struct EventMatchTimeWindow
{
UInt64 first_timestamp;
UInt64 last_timestamp;
std::array<UInt64, MAX_EVENTS> event_path;
EventMatchTimeWindow() = default;
EventMatchTimeWindow(UInt64 first_ts, UInt64 last_ts)
: first_timestamp(first_ts), last_timestamp(last_ts) {}
};
/// We track all possible event sequences up to the current event.
/// It's required because one event can meet several conditions.
/// For example: for events 'start', 'a', 'b', 'a', 'end'.
/// The second occurrence of 'a' should be counted only once in one sequence.
/// However, we do not know in advance if the next event will be 'b' or 'end', so we try to keep both paths.
std::vector<std::list<EventMatchTimeWindow>> event_sequences(events_size);
bool has_first_event = false;
for (size_t i = 0; i < events_list.size(); ++i)
{
const auto & current_event = events_list[i];
auto timestamp = current_event.timestamp;
Int64 event_idx = current_event.event_type - 1;
UInt64 unique_id = current_event.unique_id;
if (strict_order && event_idx == -1)
{
if (has_first_event)
break;
else
continue;
}
else if (event_idx == 0)
{
auto & event_seq = event_sequences[0].emplace_back(timestamp, timestamp);
event_seq.event_path[0] = unique_id;
has_first_event = true;
}
else if (strict_deduplication && !event_sequences[event_idx].empty())
{
return events_list[i - 1].event_type;
}
else if (strict_order && has_first_event && event_sequences[event_idx - 1].empty())
{
for (size_t event = 0; event < event_sequences.size(); ++event)
{
if (event_sequences[event].empty())
return event;
}
}
else if (!event_sequences[event_idx - 1].empty())
{
auto & prev_level = event_sequences[event_idx - 1];
for (auto it = prev_level.begin(); it != prev_level.end();)
{
auto first_ts = it->first_timestamp;
bool time_matched = timestamp <= first_ts + window;
if (!time_matched && prev_level.size() > 1)
{
// Remove old events that are out of the window, but keep at least one
it = prev_level.erase(it);
continue;
}
auto prev_path = it->event_path;
chassert(event_idx > 0);
/// Ensure the unique_id hasn't been used in the path already
for (size_t j = 0; j < static_cast<size_t>(event_idx); ++j)
{
if (!time_matched)
break;
time_matched = prev_path[j] != unique_id;
}
if (time_matched && strict_increase)
time_matched = it->last_timestamp < timestamp;
if (time_matched)
{
prev_path[event_idx] = unique_id;
auto & new_seq = event_sequences[event_idx].emplace_back(first_ts, timestamp);
new_seq.event_path = std::move(prev_path);
if (event_idx + 1 == events_size)
return events_size;
}
++it;
}
}
}
for (size_t event = event_sequences.size(); event > 0; --event)
{
if (!event_sequences[event - 1].empty())
return event;
}
return 0;
}
UInt8 getEventLevel(Data & data) const
{
if (data.size() == 0)
return 0;
if (!strict_order && events_size == 1)
return 1;
data.sort();
if constexpr (Data::strict_once_enabled)
return getEventLevelStrictOnce(data.events_list);
else
return getEventLevelNonStrictOnce(data.events_list);
}
public:
String getName() const override
{
@ -246,6 +492,9 @@ public:
strict_order = true;
else if (option == "strict_increase")
strict_increase = true;
else if (option == "strict_once")
/// Checked in factory
chassert(Data::strict_once_enabled);
else if (option == "strict")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "strict is replaced with strict_deduplication in Aggregate function {}", getName());
else
@ -272,6 +521,9 @@ public:
if (strict_order && !has_event)
this->data(place).add(timestamp, 0);
if constexpr (Data::strict_once_enabled)
this->data(place).advanceId();
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@ -296,7 +548,6 @@ public:
};
template <template <typename> class Data>
AggregateFunctionPtr
createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
{
@ -309,7 +560,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function {} requires one timestamp argument and at least one event condition.", name);
if (arguments.size() > max_events + 1)
if (arguments.size() > MAX_EVENTS + 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many event arguments for aggregate function {}", name);
for (const auto i : collections::range(1, arguments.size()))
@ -321,16 +572,29 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
cond_arg->getName(), toString(i + 1), name);
}
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
bool strict_once = params.size() > 1 && std::any_of(params.begin() + 1, params.end(), [](const auto & f) { return f.template safeGet<String>() == "strict_once"; });
if (strict_once)
{
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelStrictOnceData>(*arguments[0], arguments, params));
WhichDataType which(arguments.front().get());
if (res)
return res;
if (which.isDate())
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(arguments, params);
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDate::FieldType>>>(arguments, params);
if (which.isDateTime())
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(
arguments, params);
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDateTime::FieldType>>>(arguments, params);
}
else
{
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelData>(*arguments[0], arguments, params));
WhichDataType which(arguments.front().get());
if (res)
return res;
if (which.isDate())
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelData<DataTypeDate::FieldType>>>(arguments, params);
if (which.isDateTime())
return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelData<DataTypeDateTime::FieldType>>>(arguments, params);
}
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of aggregate function {}, must "
"be Unsigned Number, Date, DateTime", arguments.front().get()->getName(), name);
@ -340,7 +604,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory & factory)
{
factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel<AggregateFunctionWindowFunnelData>);
factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel);
}
}

View File

@ -103,6 +103,7 @@ namespace Setting
extern const SettingsBool single_join_prefer_left_table;
extern const SettingsBool transform_null_in;
extern const SettingsUInt64 use_structure_from_insertion_table_in_table_functions;
extern const SettingsBool use_concurrency_control;
}
@ -588,6 +589,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
PullingAsyncPipelineExecutor executor(io.pipeline);
io.pipeline.setProgressCallback(context->getProgressCallback());
io.pipeline.setProcessListElement(context->getProcessListElement());
io.pipeline.setConcurrencyControl(context->getSettingsRef()[Setting::use_concurrency_control]);
Block block;

View File

@ -20,7 +20,7 @@ BackupReaderDisk::~BackupReaderDisk() = default;
bool BackupReaderDisk::fileExists(const String & file_name)
{
return disk->exists(root_path / file_name);
return disk->existsFile(root_path / file_name);
}
UInt64 BackupReaderDisk::getFileSize(const String & file_name)
@ -68,7 +68,7 @@ BackupWriterDisk::~BackupWriterDisk() = default;
bool BackupWriterDisk::fileExists(const String & file_name)
{
return disk->exists(root_path / file_name);
return disk->existsFile(root_path / file_name);
}
UInt64 BackupWriterDisk::getFileSize(const String & file_name)
@ -91,7 +91,7 @@ std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_nam
void BackupWriterDisk::removeFile(const String & file_name)
{
disk->removeFileIfExists(root_path / file_name);
if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(root_path);
}
@ -99,7 +99,7 @@ void BackupWriterDisk::removeFiles(const Strings & file_names)
{
for (const auto & file_name : file_names)
disk->removeFileIfExists(root_path / file_name);
if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(root_path);
}

View File

@ -11,6 +11,12 @@
namespace DB
{
namespace ServerSetting
{
extern const ServerSettingsString default_replica_name;
extern const ServerSettingsString default_replica_path;
}
namespace
{
void visitStorageSystemTableEngine(ASTStorage &, const DDLAdjustingForBackupVisitor::Data & data)
@ -55,8 +61,8 @@ namespace
zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}");
}
const auto & server_settings = data.global_context->getServerSettings();
if ((zookeeper_path_arg == server_settings.default_replica_path.value)
&& (replica_name_arg == server_settings.default_replica_name.value)
if ((zookeeper_path_arg == server_settings[ServerSetting::default_replica_path].value)
&& (replica_name_arg == server_settings[ServerSetting::default_replica_name].value)
&& ((engine_args.size() == 2) || !engine_args[2]->as<ASTLiteral>()))
{
engine_args.erase(engine_args.begin(), engine_args.begin() + 2);

View File

@ -10,6 +10,10 @@ namespace Setting
{
extern const SettingsSeconds http_receive_timeout;
}
namespace ServerSetting
{
extern const ServerSettingsSeconds keep_alive_timeout;
}
LibraryBridgeHelper::LibraryBridgeHelper(ContextPtr context_)
: IBridgeHelper(context_)
@ -18,7 +22,7 @@ LibraryBridgeHelper::LibraryBridgeHelper(ContextPtr context_)
, http_timeout(context_->getGlobalContext()->getSettingsRef()[Setting::http_receive_timeout].value)
, bridge_host(config.getString("library_bridge.host", DEFAULT_HOST))
, bridge_port(config.getUInt("library_bridge.port", DEFAULT_PORT))
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings().keep_alive_timeout))
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_->getSettingsRef(), context_->getServerSettings()))
{
}

View File

@ -99,7 +99,7 @@ protected:
{
auto buf = BuilderRWBufferFromHTTP(getPingURI())
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withTimeouts(getHTTPTimeouts())
.withTimeouts(ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings()))
.withSettings(getContext()->getReadSettings())
.create(credentials);
@ -165,11 +165,6 @@ private:
Poco::Net::HTTPBasicCredentials credentials{};
ConnectionTimeouts getHTTPTimeouts()
{
return ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings().keep_alive_timeout);
}
protected:
using URLParams = std::vector<std::pair<std::string, std::string>>;
@ -206,7 +201,7 @@ protected:
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.withTimeouts(ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings()))
.withSettings(getContext()->getReadSettings())
.create(credentials);
@ -233,7 +228,7 @@ protected:
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.withTimeouts(ConnectionTimeouts::getHTTPTimeouts(getContext()->getSettingsRef(), getContext()->getServerSettings()))
.withSettings(getContext()->getReadSettings())
.create(credentials);

View File

@ -244,6 +244,7 @@ add_object_library(clickhouse_storages Storages)
add_object_library(clickhouse_storages_mysql Storages/MySQL)
add_object_library(clickhouse_storages_distributed Storages/Distributed)
add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_mergetree_merge_selectors Storages/MergeTree/MergeSelectors)
add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)

View File

@ -107,6 +107,7 @@ namespace Setting
extern const SettingsUInt64 output_format_pretty_max_value_width;
extern const SettingsBool partial_result_on_first_cancel;
extern const SettingsBool throw_if_no_data_to_insert;
extern const SettingsBool implicit_select;
}
namespace ErrorCodes
@ -320,7 +321,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(max_length, settings[Setting::max_parser_depth], settings[Setting::max_parser_backtracks]);
else
parser = std::make_unique<ParserQuery>(end, settings[Setting::allow_settings_after_format_in_insert]);
parser = std::make_unique<ParserQuery>(end, settings[Setting::allow_settings_after_format_in_insert], settings[Setting::implicit_select]);
if (is_interactive || ignore_error)
{
@ -1784,6 +1785,9 @@ try
QueryPipeline pipeline(std::move(pipe));
PullingAsyncPipelineExecutor executor(pipeline);
/// Concurrency control in client is not required
pipeline.setConcurrencyControl(false);
if (need_render_progress)
{
pipeline.setProgressCallback([this](const Progress & progress){ onProgress(progress); });

View File

@ -33,6 +33,7 @@ namespace Setting
extern const SettingsUInt64 max_parser_backtracks;
extern const SettingsUInt64 max_parser_depth;
extern const SettingsUInt64 max_query_size;
extern const SettingsBool implicit_select;
}
namespace ErrorCodes
@ -178,7 +179,7 @@ void LocalConnection::sendQuery(
parser
= std::make_unique<ParserPRQLQuery>(settings[Setting::max_query_size], settings[Setting::max_parser_depth], settings[Setting::max_parser_backtracks]);
else
parser = std::make_unique<ParserQuery>(end, settings[Setting::allow_settings_after_format_in_insert]);
parser = std::make_unique<ParserQuery>(end, settings[Setting::allow_settings_after_format_in_insert], settings[Setting::implicit_select]);
ASTPtr parsed_query;
if (dialect == Dialect::kusto)
@ -268,6 +269,7 @@ void LocalConnection::sendQuery(
{
state->block = state->io.pipeline.getHeader();
state->executor = std::make_unique<PullingAsyncPipelineExecutor>(state->io.pipeline);
state->io.pipeline.setConcurrencyControl(false);
}
else if (state->io.pipeline.completed())
{

View File

@ -1,7 +1,23 @@
#include <Common/ISlotControl.h>
#include <Common/ConcurrencyControl.h>
#include <Common/Exception.h>
#include <Common/ProfileEvents.h>
namespace ProfileEvents
{
extern const Event ConcurrencyControlSlotsGranted;
extern const Event ConcurrencyControlSlotsDelayed;
extern const Event ConcurrencyControlSlotsAcquired;
extern const Event ConcurrencyControlQueriesDelayed;
}
namespace CurrentMetrics
{
extern const Metric ConcurrencyControlAcquired;
extern const Metric ConcurrencyControlSoftLimit;
}
namespace DB
{
@ -17,6 +33,7 @@ ConcurrencyControl::Slot::~Slot()
ConcurrencyControl::Slot::Slot(SlotAllocationPtr && allocation_)
: allocation(std::move(allocation_))
, acquired_slot_increment(CurrentMetrics::ConcurrencyControlAcquired)
{
}
@ -34,6 +51,7 @@ ConcurrencyControl::Allocation::~Allocation()
{
if (granted.compare_exchange_strong(value, value - 1))
{
ProfileEvents::increment(ProfileEvents::ConcurrencyControlSlotsAcquired, 1);
std::unique_lock lock{mutex};
return AcquiredSlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
}
@ -84,6 +102,7 @@ void ConcurrencyControl::Allocation::release()
ConcurrencyControl::ConcurrencyControl()
: cur_waiter(waiters.end())
, max_concurrency_metric(CurrentMetrics::ConcurrencyControlSoftLimit, 0)
{
}
@ -103,11 +122,17 @@ ConcurrencyControl::~ConcurrencyControl()
// Acquire as many slots as we can, but not lower than `min`
SlotCount granted = std::max(min, std::min(max, available(lock)));
cur_concurrency += granted;
ProfileEvents::increment(ProfileEvents::ConcurrencyControlSlotsGranted, min);
// Create allocation and start waiting if more slots are required
if (granted < max)
{
ProfileEvents::increment(ProfileEvents::ConcurrencyControlSlotsDelayed, max - granted);
ProfileEvents::increment(ProfileEvents::ConcurrencyControlQueriesDelayed);
return SlotAllocationPtr(new Allocation(*this, max, granted,
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
}
else
return SlotAllocationPtr(new Allocation(*this, max, granted));
}
@ -115,6 +140,7 @@ void ConcurrencyControl::setMaxConcurrency(SlotCount value)
{
std::unique_lock lock{mutex};
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
max_concurrency_metric.changeTo(max_concurrency == UnlimitedSlots ? 0 : max_concurrency);
schedule(lock);
}

View File

@ -8,6 +8,7 @@
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <Common/CurrentMetrics.h>
#include <Common/ISlotControl.h>
namespace DB
@ -53,6 +54,7 @@ public:
explicit Slot(SlotAllocationPtr && allocation_);
SlotAllocationPtr allocation;
CurrentMetrics::Increment acquired_slot_increment;
};
// Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
@ -131,6 +133,7 @@ private:
Waiters::iterator cur_waiter; // round-robin pointer
SlotCount max_concurrency = UnlimitedSlots;
SlotCount cur_concurrency = 0;
CurrentMetrics::Increment max_concurrency_metric;
};
}

View File

@ -216,6 +216,9 @@
M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool.") \
M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \
M(ParquetDecoderThreadsScheduled, "Number of queued or active jobs in the ParquetBlockInputFormat thread pool.") \
M(ParquetDecoderIOThreads, "Number of threads in the ParquetBlockInputFormat io thread pool.") \
M(ParquetDecoderIOThreadsActive, "Number of threads in the ParquetBlockInputFormat io thread pool running a task.") \
M(ParquetDecoderIOThreadsScheduled, "Number of queued or active jobs in the ParquetBlockInputFormat io thread pool.") \
M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \
M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \
M(ParquetEncoderThreadsScheduled, "Number of queued or active jobs in ParquetBlockOutputFormat thread pool.") \
@ -318,6 +321,9 @@
M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \
M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
\
M(ConcurrencyControlAcquired, "Total number of acquired CPU slots") \
M(ConcurrencyControlSoftLimit, "Value of soft limit on number of CPU slots") \
\
M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
\
M(SharedCatalogStateApplicationThreads, "Number of threads in the threadpool for state application in Shared Catalog.") \

View File

@ -196,7 +196,7 @@ void FileChecker::load()
bool FileChecker::fileReallyExists(const String & path_) const
{
return disk ? disk->exists(path_) : fs::exists(path_);
return disk ? disk->existsFile(path_) : fs::exists(path_);
}
size_t FileChecker::getRealFileSize(const String & path_) const

View File

@ -73,4 +73,44 @@ public:
[[nodiscard]] virtual SlotAllocationPtr allocate(SlotCount min, SlotCount max) = 0;
};
/// Allocation that grants all the slots immediately on creation
class GrantedAllocation : public ISlotAllocation
{
public:
explicit GrantedAllocation(SlotCount granted_)
: granted(granted_)
, allocated(granted_)
{}
[[nodiscard]] AcquiredSlotPtr tryAcquire() override
{
SlotCount value = granted.load();
while (value)
{
if (granted.compare_exchange_strong(value, value - 1))
return std::make_shared<IAcquiredSlot>();
}
return {};
}
SlotCount grantedCount() const override
{
return granted.load();
}
SlotCount allocatedCount() const override
{
return allocated;
}
private:
std::atomic<SlotCount> granted; // allocated, but not yet acquired
const SlotCount allocated;
};
[[nodiscard]] inline SlotAllocationPtr grantSlots(SlotCount count)
{
return SlotAllocationPtr(new GrantedAllocation(count));
}
}

View File

@ -9,11 +9,11 @@
throw std::runtime_error(error);
}
std::unordered_map<UInt64, std::pair<time_t, size_t>> LogFrequencyLimiterIml::logged_messages;
time_t LogFrequencyLimiterIml::last_cleanup = 0;
std::mutex LogFrequencyLimiterIml::mutex;
std::unordered_map<UInt64, std::pair<time_t, size_t>> LogFrequencyLimiterImpl::logged_messages;
time_t LogFrequencyLimiterImpl::last_cleanup = 0;
std::mutex LogFrequencyLimiterImpl::mutex;
void LogFrequencyLimiterIml::log(Poco::Message & message)
void LogFrequencyLimiterImpl::log(Poco::Message & message)
{
std::string_view pattern = message.getFormatString();
if (pattern.empty())
@ -68,7 +68,7 @@ void LogFrequencyLimiterIml::log(Poco::Message & message)
channel->log(message);
}
void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s)
void LogFrequencyLimiterImpl::cleanup(time_t too_old_threshold_s)
{
time_t now = time(nullptr);
time_t old = now - too_old_threshold_s;

View File

@ -230,7 +230,7 @@ template<> struct FormatStringTypeInfo<PreformattedMessage> { static constexpr b
/// This wrapper helps to avoid too frequent and noisy log messages.
/// For each pair (logger_name, format_string) it remembers when such a message was logged the last time.
/// The message will not be logged again if less than min_interval_s seconds passed since the previously logged message.
class LogFrequencyLimiterIml
class LogFrequencyLimiterImpl
{
/// Hash(logger_name, format_string) -> (last_logged_time_s, skipped_messages_count)
static std::unordered_map<UInt64, std::pair<time_t, size_t>> logged_messages;
@ -240,11 +240,11 @@ class LogFrequencyLimiterIml
LoggerPtr logger;
time_t min_interval_s;
public:
LogFrequencyLimiterIml(LoggerPtr logger_, time_t min_interval_s_) : logger(std::move(logger_)), min_interval_s(min_interval_s_) {}
LogFrequencyLimiterImpl(LoggerPtr logger_, time_t min_interval_s_) : logger(std::move(logger_)), min_interval_s(min_interval_s_) {}
LogFrequencyLimiterIml & operator -> () { return *this; }
LogFrequencyLimiterImpl & operator -> () { return *this; }
bool is(Poco::Message::Priority priority) { return logger->is(priority); }
LogFrequencyLimiterIml * getChannel() {return this; }
LogFrequencyLimiterImpl * getChannel() {return this; }
const String & name() const { return logger->name(); }
void log(Poco::Message & message);
@ -257,9 +257,9 @@ public:
/// This wrapper helps to avoid too noisy log messages from similar objects.
/// Once an instance of LogSeriesLimiter type is created the decision is done
/// All followed message which use this instance is either printed or muted all together.
/// LogSeriesLimiter differs from LogFrequencyLimiterIml in a way that
/// LogSeriesLimiter is useful for accept or mute series of logs when LogFrequencyLimiterIml works for each line independently.
/// All followed messages which use this instance are either printed or muted altogether.
/// LogSeriesLimiter differs from LogFrequencyLimiterImpl in a way that
/// LogSeriesLimiter is useful for accept or mute series of logs when LogFrequencyLimiterImpl works for each line independently.
class LogSeriesLimiter
{
static std::mutex mutex;
@ -295,11 +295,11 @@ class LogToStrImpl
{
String & out_str;
LoggerPtr logger;
std::unique_ptr<LogFrequencyLimiterIml> maybe_nested;
std::unique_ptr<LogFrequencyLimiterImpl> maybe_nested;
bool propagate_to_actual_log = true;
public:
LogToStrImpl(String & out_str_, LoggerPtr logger_) : out_str(out_str_), logger(std::move(logger_)) {}
LogToStrImpl(String & out_str_, std::unique_ptr<LogFrequencyLimiterIml> && maybe_nested_)
LogToStrImpl(String & out_str_, std::unique_ptr<LogFrequencyLimiterImpl> && maybe_nested_)
: out_str(out_str_), logger(maybe_nested_->getLogger()), maybe_nested(std::move(maybe_nested_)) {}
LogToStrImpl & operator -> () { return *this; }
bool is(Poco::Message::Priority priority) { propagate_to_actual_log &= logger->is(priority); return true; }

View File

@ -222,6 +222,8 @@
M(SelectedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) SELECTed from all tables.", ValueType::Bytes) \
M(RowsReadByMainReader, "Number of rows read from MergeTree tables by the main reader (after PREWHERE step).", ValueType::Number) \
M(RowsReadByPrewhereReaders, "Number of rows read from MergeTree tables (in total) by prewhere readers.", ValueType::Number) \
M(LoadedDataParts, "Number of data parts loaded by MergeTree tables during initialization.", ValueType::Number) \
M(LoadedDataPartsMicroseconds, "Microseconds spent by MergeTree tables for loading data parts during initialization.", ValueType::Microseconds) \
\
M(WaitMarksLoadMicroseconds, "Time spent loading marks", ValueType::Microseconds) \
M(BackgroundLoadingMarksTasks, "Number of background tasks for loading marks", ValueType::Number) \
@ -877,6 +879,11 @@ The server successfully detected this situation and will download merged part fr
M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP", ValueType::Number) \
M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.", ValueType::Bytes) \
\
M(ConcurrencyControlSlotsGranted, "Number of CPU slot granted according to guarantee of 1 thread per query and for queries with setting 'use_concurrency_control' = 0", ValueType::Number) \
M(ConcurrencyControlSlotsDelayed, "Number of CPU slot not granted initially and required to wait for a free CPU slot", ValueType::Number) \
M(ConcurrencyControlSlotsAcquired, "Total number of CPU slot acquired", ValueType::Number) \
M(ConcurrencyControlQueriesDelayed, "Total number of CPU slot allocations (queries) that were required to wait for slots to upscale", ValueType::Number) \
\
M(SharedDatabaseCatalogFailedToApplyState, "Number of failures to apply new state in SharedDatabaseCatalog", ValueType::Number) \
M(SharedDatabaseCatalogStateApplicationMicroseconds, "Total time spend on application of new state in SharedDatabaseCatalog", ValueType::Microseconds) \
\
@ -886,6 +893,8 @@ The server successfully detected this situation and will download merged part fr
\
M(MemoryWorkerRun, "Number of runs done by MemoryWorker in background", ValueType::Number) \
M(MemoryWorkerRunElapsedMicroseconds, "Total time spent by MemoryWorker for background work", ValueType::Microseconds) \
\
M(ParquetFetchWaitTimeMicroseconds, "Time of waiting fetching parquet data", ValueType::Microseconds) \
#ifdef APPLY_FOR_EXTERNAL_EVENTS

View File

@ -31,7 +31,7 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const
endpoint.toString(),
response.getStatus(),
response.getReason(),
"");
/* body_length = */ 0);
std::string proxy_host;
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);

View File

@ -10,11 +10,11 @@ namespace ProfileEvents
namespace DB
{
TaskTracker::TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_)
TaskTracker::TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limited_log_)
: is_async(bool(scheduler_))
, scheduler(scheduler_ ? std::move(scheduler_) : syncRunner())
, max_tasks_inflight(max_tasks_inflight_)
, limitedLog(limitedLog_)
, limited_log(limited_log_)
{}
TaskTracker::~TaskTracker()
@ -142,7 +142,7 @@ void TaskTracker::waitTilInflightShrink()
return;
if (futures.size() >= max_tasks_inflight)
LOG_TEST(limitedLog, "have to wait some tasks finish, in queue {}, limit {}", futures.size(), max_tasks_inflight);
LOG_TEST(limited_log, "have to wait some tasks finish, in queue {}, limit {}", futures.size(), max_tasks_inflight);
Stopwatch watch;

View File

@ -23,7 +23,7 @@ class TaskTracker
public:
using Callback = std::function<void()>;
TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_);
TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limited_log_);
~TaskTracker();
static ThreadPoolCallbackRunnerUnsafe<void> syncRunner();
@ -55,7 +55,7 @@ private:
using FutureList = std::list<std::future<void>>;
FutureList futures;
LogSeriesLimiterPtr limitedLog;
LogSeriesLimiterPtr limited_log;
std::mutex mutex;
std::condition_variable has_finished TSA_GUARDED_BY(mutex);

View File

@ -81,7 +81,7 @@ void ZooKeeperLock::unlock()
zookeeper->remove(lock_path, -1);
LOG_TRACE(log, "Lock on path {} for session {} is unlocked", lock_path, zookeeper->getClientID());
}
else if (result)
else if (result && throw_if_lost) /// NOTE: What if session expired exactly here?
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Lock is lost, it has another owner. Path: {}, message: {}, owner: {}, our id: {}",
lock_path, lock_message, stat.ephemeralOwner, zookeeper->getClientID());
else if (throw_if_lost)

View File

@ -14,7 +14,7 @@
#define LogToStr(x, y) std::make_unique<LogToStrImpl>(x, y)
#define LogFrequencyLimiter(x, y) std::make_unique<LogFrequencyLimiterIml>(x, y)
#define LogFrequencyLimiter(x, y) std::make_unique<LogFrequencyLimiterImpl>(x, y)
using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
@ -24,7 +24,7 @@ namespace impl
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const DB::AtomicLogger & logger) { return logger.load(); }
[[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
[[maybe_unused]] inline std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterImpl> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterImpl> && logger) { return logger; }
[[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; }
}

View File

@ -25,15 +25,11 @@ namespace
* `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com,
* while `wget` does an exact match.
* */
std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host)
std::string buildPocoRegexpEntryWithoutLeadingDot(std::string_view host)
{
std::string_view view_without_leading_dot = host;
if (host[0] == '.')
{
view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()};
}
return RE2::QuoteMeta(view_without_leading_dot);
if (host.starts_with('.'))
host.remove_prefix(1);
return RE2::QuoteMeta(host);
}
}

View File

@ -1890,7 +1890,7 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
{
auto & changelog_description = itr->second;
if (!disk->exists(timestamp_folder))
if (!disk->existsDirectory(timestamp_folder))
{
LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder);
disk->createDirectories(timestamp_folder);

View File

@ -1,10 +1,10 @@
#include <Coordination/CoordinationSettings.h>
#include <Common/logger_useful.h>
#include <Coordination/Defines.h>
#include <Core/BaseSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteIntText.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include "config.h"
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
@ -13,9 +13,66 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
}
/** These settings represent fine tunes for internal details of Coordination storages
* and should not be changed by the user without a reason.
*/
#define LIST_OF_COORDINATION_SETTINGS(M, ALIAS) \
M(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \
M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \
M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \
M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
M(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \
M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
struct CoordinationSettingsImpl : public BaseSettings<CoordinationSettingsTraits>
{
void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
};
IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
void CoordinationSettingsImpl::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
{
if (!config.has(config_elem))
return;
@ -41,6 +98,44 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco
max_requests_append_size = max_requests_batch_size;
}
#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
CoordinationSettings##TYPE NAME = &CoordinationSettings##Impl ::NAME;
namespace CoordinationSetting
{
LIST_OF_COORDINATION_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS)
}
#undef INITIALIZE_SETTING_EXTERN
CoordinationSettings::CoordinationSettings() : impl(std::make_unique<CoordinationSettingsImpl>())
{
}
CoordinationSettings::CoordinationSettings(const CoordinationSettings & settings)
: impl(std::make_unique<CoordinationSettingsImpl>(*settings.impl))
{
}
CoordinationSettings::~CoordinationSettings() = default;
#define IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR(CLASS_NAME, TYPE) \
const SettingField##TYPE & CoordinationSettings::operator[](CLASS_NAME##TYPE t) const \
{ \
return impl.get()->*t; \
} \
SettingField##TYPE & CoordinationSettings::operator[](CLASS_NAME##TYPE t) \
{ \
return impl.get()->*t; \
}
COORDINATION_SETTINGS_SUPPORTED_TYPES(CoordinationSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR)
#undef IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR
void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config)
{
impl->loadFromConfig(config_elem, config);
}
const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD =
#if USE_JEMALLOC
@ -54,7 +149,7 @@ KeeperConfigurationAndSettings::KeeperConfigurationAndSettings()
, tcp_port(NOT_EXIST)
, tcp_port_secure(NOT_EXIST)
, standalone_keeper(false)
, coordination_settings(std::make_shared<CoordinationSettings>())
, coordination_settings()
{
}
@ -98,94 +193,94 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
/// coordination_settings
writeText("max_requests_batch_size=", buf);
write_int(coordination_settings->max_requests_batch_size);
write_int(coordination_settings[CoordinationSetting::max_requests_batch_size]);
writeText("min_session_timeout_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->min_session_timeout_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::min_session_timeout_ms]));
writeText("session_timeout_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->session_timeout_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::session_timeout_ms]));
writeText("operation_timeout_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->operation_timeout_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::operation_timeout_ms]));
writeText("dead_session_check_period_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->dead_session_check_period_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::dead_session_check_period_ms]));
writeText("heart_beat_interval_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->heart_beat_interval_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::heart_beat_interval_ms]));
writeText("election_timeout_lower_bound_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->election_timeout_lower_bound_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::election_timeout_lower_bound_ms]));
writeText("election_timeout_upper_bound_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->election_timeout_upper_bound_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::election_timeout_upper_bound_ms]));
writeText("leadership_expiry_ms=", buf);
write_int(static_cast<uint64_t>(coordination_settings->leadership_expiry_ms));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::leadership_expiry_ms]));
writeText("reserved_log_items=", buf);
write_int(coordination_settings->reserved_log_items);
write_int(coordination_settings[CoordinationSetting::reserved_log_items]);
writeText("snapshot_distance=", buf);
write_int(coordination_settings->snapshot_distance);
write_int(coordination_settings[CoordinationSetting::snapshot_distance]);
writeText("auto_forwarding=", buf);
write_bool(coordination_settings->auto_forwarding);
write_bool(coordination_settings[CoordinationSetting::auto_forwarding]);
writeText("shutdown_timeout=", buf);
write_int(static_cast<uint64_t>(coordination_settings->shutdown_timeout));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::shutdown_timeout]));
writeText("startup_timeout=", buf);
write_int(static_cast<uint64_t>(coordination_settings->startup_timeout));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::startup_timeout]));
writeText("raft_logs_level=", buf);
writeText(coordination_settings->raft_logs_level.toString(), buf);
writeText(coordination_settings[CoordinationSetting::raft_logs_level].toString(), buf);
buf.write('\n');
writeText("snapshots_to_keep=", buf);
write_int(coordination_settings->snapshots_to_keep);
write_int(coordination_settings[CoordinationSetting::snapshots_to_keep]);
writeText("rotate_log_storage_interval=", buf);
write_int(coordination_settings->rotate_log_storage_interval);
write_int(coordination_settings[CoordinationSetting::rotate_log_storage_interval]);
writeText("stale_log_gap=", buf);
write_int(coordination_settings->stale_log_gap);
write_int(coordination_settings[CoordinationSetting::stale_log_gap]);
writeText("fresh_log_gap=", buf);
write_int(coordination_settings->fresh_log_gap);
write_int(coordination_settings[CoordinationSetting::fresh_log_gap]);
writeText("max_requests_batch_size=", buf);
write_int(coordination_settings->max_requests_batch_size);
write_int(coordination_settings[CoordinationSetting::max_requests_batch_size]);
writeText("max_requests_batch_bytes_size=", buf);
write_int(coordination_settings->max_requests_batch_bytes_size);
write_int(coordination_settings[CoordinationSetting::max_requests_batch_bytes_size]);
writeText("max_flush_batch_size=", buf);
write_int(coordination_settings->max_flush_batch_size);
write_int(coordination_settings[CoordinationSetting::max_flush_batch_size]);
writeText("max_request_queue_size=", buf);
write_int(coordination_settings->max_request_queue_size);
write_int(coordination_settings[CoordinationSetting::max_request_queue_size]);
writeText("max_requests_quick_batch_size=", buf);
write_int(coordination_settings->max_requests_quick_batch_size);
write_int(coordination_settings[CoordinationSetting::max_requests_quick_batch_size]);
writeText("quorum_reads=", buf);
write_bool(coordination_settings->quorum_reads);
write_bool(coordination_settings[CoordinationSetting::quorum_reads]);
writeText("force_sync=", buf);
write_bool(coordination_settings->force_sync);
write_bool(coordination_settings[CoordinationSetting::force_sync]);
writeText("compress_logs=", buf);
write_bool(coordination_settings->compress_logs);
write_bool(coordination_settings[CoordinationSetting::compress_logs]);
writeText("compress_snapshots_with_zstd_format=", buf);
write_bool(coordination_settings->compress_snapshots_with_zstd_format);
write_bool(coordination_settings[CoordinationSetting::compress_snapshots_with_zstd_format]);
writeText("configuration_change_tries_count=", buf);
write_int(coordination_settings->configuration_change_tries_count);
write_int(coordination_settings[CoordinationSetting::configuration_change_tries_count]);
writeText("raft_limits_reconnect_limit=", buf);
write_int(static_cast<uint64_t>(coordination_settings->raft_limits_reconnect_limit));
write_int(static_cast<uint64_t>(coordination_settings[CoordinationSetting::raft_limits_reconnect_limit]));
writeText("async_replication=", buf);
write_bool(coordination_settings->async_replication);
write_bool(coordination_settings[CoordinationSetting::async_replication]);
writeText("latest_logs_cache_size_threshold=", buf);
write_int(coordination_settings->latest_logs_cache_size_threshold);
write_int(coordination_settings[CoordinationSetting::latest_logs_cache_size_threshold]);
writeText("commit_logs_cache_size_threshold=", buf);
write_int(coordination_settings->commit_logs_cache_size_threshold);
write_int(coordination_settings[CoordinationSetting::commit_logs_cache_size_threshold]);
writeText("disk_move_retries_wait_ms=", buf);
write_int(coordination_settings->disk_move_retries_wait_ms);
write_int(coordination_settings[CoordinationSetting::disk_move_retries_wait_ms]);
writeText("disk_move_retries_during_init=", buf);
write_int(coordination_settings->disk_move_retries_during_init);
write_int(coordination_settings[CoordinationSetting::disk_move_retries_during_init]);
writeText("log_slow_total_threshold_ms=", buf);
write_int(coordination_settings->log_slow_total_threshold_ms);
write_int(coordination_settings[CoordinationSetting::log_slow_total_threshold_ms]);
writeText("log_slow_cpu_threshold_ms=", buf);
write_int(coordination_settings->log_slow_cpu_threshold_ms);
write_int(coordination_settings[CoordinationSetting::log_slow_cpu_threshold_ms]);
writeText("log_slow_connection_operation_threshold_ms=", buf);
write_int(coordination_settings->log_slow_connection_operation_threshold_ms);
write_int(coordination_settings[CoordinationSetting::log_slow_connection_operation_threshold_ms]);
}
KeeperConfigurationAndSettingsPtr
@ -217,7 +312,7 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu
DEFAULT_FOUR_LETTER_WORD_CMD));
ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
ret->coordination_settings.loadFromConfig("keeper_server.coordination_settings", config);
return ret;
}

View File

@ -1,76 +1,43 @@
#pragma once
#include <Core/Defines.h>
#include <Core/BaseSettings.h>
#include <Core/BaseSettingsFwdMacros.h>
#include <Core/SettingsEnums.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <IO/WriteBufferFromString.h>
#include <Core/SettingsFields.h>
namespace Poco
{
namespace Util
{
class AbstractConfiguration;
}
}
namespace DB
{
struct CoordinationSettingsImpl;
class WriteBufferFromOwnString;
struct Settings;
/// List of available types supported in CoordinationSettings object
#define COORDINATION_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \
M(CLASS_NAME, Bool) \
M(CLASS_NAME, LogsLevel) \
M(CLASS_NAME, Milliseconds) \
M(CLASS_NAME, UInt64)
COORDINATION_SETTINGS_SUPPORTED_TYPES(CoordinationSettings, DECLARE_SETTING_TRAIT)
/** These settings represent fine tunes for internal details of Coordination storages
* and should not be changed by the user without a reason.
*/
#define LIST_OF_COORDINATION_SETTINGS(M, ALIAS) \
M(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \
M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \
M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \
M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
M(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \
M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
struct CoordinationSettings : public BaseSettings<CoordinationSettingsTraits>
struct CoordinationSettings
{
CoordinationSettings();
CoordinationSettings(const CoordinationSettings & settings);
~CoordinationSettings();
void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config);
COORDINATION_SETTINGS_SUPPORTED_TYPES(CoordinationSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR)
private:
std::unique_ptr<CoordinationSettingsImpl> impl;
};
using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
@ -95,7 +62,7 @@ struct KeeperConfigurationAndSettings
String super_digest;
bool standalone_keeper;
CoordinationSettingsPtr coordination_settings;
CoordinationSettings coordination_settings;
void dump(WriteBufferFromOwnString & buf) const;
static std::shared_ptr<KeeperConfigurationAndSettings> loadFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_);

View File

@ -11,6 +11,12 @@
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsUInt64 disk_move_retries_during_init;
extern const CoordinationSettingsUInt64 disk_move_retries_wait_ms;
}
static size_t findLastSlash(StringRef path)
{
if (path.size == 0)
@ -60,8 +66,8 @@ void moveFileBetweenDisks(
auto tmp_file_name = from_path.parent_path() / (std::string{tmp_keeper_file_prefix} + from_path.filename().string());
const auto & coordination_settings = keeper_context->getCoordinationSettings();
auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value;
auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms);
auto max_retries_on_init = coordination_settings[CoordinationSetting::disk_move_retries_during_init].value;
auto retries_sleep = std::chrono::milliseconds(coordination_settings[CoordinationSetting::disk_move_retries_wait_ms]);
auto run_with_retries = [&](const auto & op, std::string_view operation_description)
{
size_t retry_num = 0;

View File

@ -167,6 +167,12 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
digest_enabled = false; /// TODO: support digest
}
#endif
if (config.has("keeper_server.precommit_sleep_ms_for_testing"))
precommit_sleep_ms_for_testing = config.getInt64("keeper_server.precommit_sleep_ms_for_testing");
if (config.has("keeper_server.precommit_sleep_probability_for_testing"))
precommit_sleep_probability_for_testing = config.getDouble("keeper_server.precommit_sleep_probability_for_testing");
}
namespace
@ -558,9 +564,9 @@ void KeeperContext::waitLocalLogsPreprocessedOrShutdown()
local_logs_preprocessed_cv.wait(lock, [this]{ return shutdown_called || local_logs_preprocessed; });
}
const CoordinationSettingsPtr & KeeperContext::getCoordinationSettings() const
const CoordinationSettings & KeeperContext::getCoordinationSettings() const
{
return coordination_settings;
return *coordination_settings;
}
uint64_t KeeperContext::lastCommittedIndex() const

View File

@ -92,7 +92,18 @@ public:
/// returns true if the log is committed, false if timeout happened
bool waitCommittedUpto(uint64_t log_idx, uint64_t wait_timeout_ms);
const CoordinationSettingsPtr & getCoordinationSettings() const;
const CoordinationSettings & getCoordinationSettings() const;
int64_t getPrecommitSleepMillisecondsForTesting() const
{
return precommit_sleep_ms_for_testing;
}
double getPrecommitSleepProbabilityForTesting() const
{
chassert(precommit_sleep_probability_for_testing >= 0 && precommit_sleep_probability_for_testing <= 1);
return precommit_sleep_probability_for_testing;
}
private:
/// local disk defined using path or disk name
@ -151,6 +162,9 @@ private:
std::mutex last_committed_log_idx_cv_mutex;
std::condition_variable last_committed_log_idx_cv;
int64_t precommit_sleep_ms_for_testing = 0;
double precommit_sleep_probability_for_testing = 0.0;
CoordinationSettingsPtr coordination_settings;
};

View File

@ -45,6 +45,18 @@ using namespace std::chrono_literals;
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsMilliseconds dead_session_check_period_ms;
extern const CoordinationSettingsUInt64 max_request_queue_size;
extern const CoordinationSettingsUInt64 max_requests_batch_bytes_size;
extern const CoordinationSettingsUInt64 max_requests_batch_size;
extern const CoordinationSettingsMilliseconds operation_timeout_ms;
extern const CoordinationSettingsBool quorum_reads;
extern const CoordinationSettingsMilliseconds session_shutdown_timeout;
extern const CoordinationSettingsMilliseconds sleep_before_leader_change_ms;
}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@ -130,10 +142,10 @@ void KeeperDispatcher::requestThread()
{
KeeperStorageBase::RequestForSession request;
auto coordination_settings = configuration_and_settings->coordination_settings;
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size;
size_t max_batch_size = coordination_settings->max_requests_batch_size;
const auto & coordination_settings = configuration_and_settings->coordination_settings;
uint64_t max_wait = coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds();
uint64_t max_batch_bytes_size = coordination_settings[CoordinationSetting::max_requests_batch_bytes_size];
size_t max_batch_size = coordination_settings[CoordinationSetting::max_requests_batch_size];
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
@ -175,7 +187,7 @@ void KeeperDispatcher::requestThread()
/// Otherwise we will process it locally.
if (request.request->getOpNum() == Coordination::OpNum::Reconfig)
has_reconfig_request = true;
else if (coordination_settings->quorum_reads || !request.request->isReadRequest())
else if (coordination_settings[CoordinationSetting::quorum_reads] || !request.request->isReadRequest())
{
current_batch_bytes_size += request.request->bytesSize();
current_batch.emplace_back(request);
@ -187,7 +199,7 @@ void KeeperDispatcher::requestThread()
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests);
/// Don't append read request into batch, we have to process them separately
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
if (!coordination_settings[CoordinationSetting::quorum_reads] && request.request->isReadRequest())
{
const auto & last_request = current_batch.back();
std::lock_guard lock(read_request_queue_mutex);
@ -287,7 +299,7 @@ void KeeperDispatcher::requestThread()
auto log_idx = bs.get_u64();
/// if timeout happened set error responses for the requests
if (!keeper_context->waitCommittedUpto(log_idx, coordination_settings->operation_timeout_ms.totalMilliseconds()))
if (!keeper_context->waitCommittedUpto(log_idx, coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds()))
addErrorResponses(prev_batch, Coordination::Error::ZOPERATIONTIMEOUT);
if (shutdown_called)
@ -325,7 +337,7 @@ void KeeperDispatcher::responseThread()
{
KeeperStorageBase::ResponseForSession response_for_session;
uint64_t max_wait = configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_wait = configuration_and_settings->coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds();
if (responses_queue.tryPop(response_for_session, max_wait))
{
@ -429,7 +441,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
if (!requests_queue->push(std::move(request_info)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push request to queue");
}
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds()))
{
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout");
}
@ -442,11 +454,11 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
LOG_DEBUG(log, "Initializing storage dispatcher");
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
keeper_context = std::make_shared<KeeperContext>(standalone_keeper, configuration_and_settings->coordination_settings);
keeper_context = std::make_shared<KeeperContext>(standalone_keeper, std::make_shared<CoordinationSettings>(configuration_and_settings->coordination_settings));
keeper_context->initialize(config, this);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_request_queue_size);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings[CoordinationSetting::max_request_queue_size]);
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
@ -605,7 +617,7 @@ void KeeperDispatcher::shutdown()
nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/,
nuraft::ptr<std::exception> & /*exception*/) { my_sessions_closing_done_promise->set_value(); });
auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds();
auto session_shutdown_timeout = configuration_and_settings->coordination_settings[CoordinationSetting::session_shutdown_timeout].totalMilliseconds();
if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready)
LOG_WARNING(
log,
@ -697,7 +709,7 @@ void KeeperDispatcher::sessionCleanerTask()
tryLogCurrentException(__PRETTY_FUNCTION__);
}
auto time_to_sleep = configuration_and_settings->coordination_settings->dead_session_check_period_ms.totalMilliseconds();
auto time_to_sleep = configuration_and_settings->coordination_settings[CoordinationSetting::dead_session_check_period_ms].totalMilliseconds();
std::this_thread::sleep_for(std::chrono::milliseconds(time_to_sleep));
}
}
@ -898,7 +910,7 @@ void KeeperDispatcher::clusterUpdateThread()
LOG_DEBUG(log, "Processing config update {}: declined, backoff", action);
std::this_thread::sleep_for(last_command_was_leader_change
? configuration_and_settings->coordination_settings->sleep_before_leader_change_ms
? configuration_and_settings->coordination_settings[CoordinationSetting::sleep_before_leader_change_ms]
: 50ms);
}
}
@ -964,7 +976,7 @@ static uint64_t getTotalSize(const DiskPtr & disk, const std::string & path = ""
uint64_t size = 0;
for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
{
if (disk->isFile(it->path()))
if (disk->existsFile(it->path()))
size += disk->getFileSize(it->path());
else
size += getTotalSize(disk, it->path());

View File

@ -51,6 +51,30 @@
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsBool async_replication;
extern const CoordinationSettingsBool auto_forwarding;
extern const CoordinationSettingsUInt64 configuration_change_tries_count;
extern const CoordinationSettingsMilliseconds election_timeout_lower_bound_ms;
extern const CoordinationSettingsMilliseconds election_timeout_upper_bound_ms;
extern const CoordinationSettingsBool experimental_use_rocksdb;
extern const CoordinationSettingsUInt64 fresh_log_gap;
extern const CoordinationSettingsMilliseconds heart_beat_interval_ms;
extern const CoordinationSettingsMilliseconds leadership_expiry_ms;
extern const CoordinationSettingsUInt64 max_requests_append_size;
extern const CoordinationSettingsMilliseconds operation_timeout_ms;
extern const CoordinationSettingsBool quorum_reads;
extern const CoordinationSettingsUInt64 raft_limits_reconnect_limit;
extern const CoordinationSettingsUInt64 raft_limits_response_limit;
extern const CoordinationSettingsLogsLevel raft_logs_level;
extern const CoordinationSettingsUInt64 reserved_log_items;
extern const CoordinationSettingsMilliseconds shutdown_timeout;
extern const CoordinationSettingsUInt64 snapshot_distance;
extern const CoordinationSettingsUInt64 stale_log_gap;
extern const CoordinationSettingsMilliseconds startup_timeout;
}
namespace ErrorCodes
{
extern const int RAFT_ERROR;
@ -189,12 +213,12 @@ KeeperServer::KeeperServer(
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
, enable_reconfiguration(config.getBool("keeper_server.enable_reconfiguration", false))
{
if (keeper_context->getCoordinationSettings()->quorum_reads)
if (keeper_context->getCoordinationSettings()[CoordinationSetting::quorum_reads])
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
#if USE_ROCKSDB
const auto & coordination_settings = keeper_context->getCoordinationSettings();
if (coordination_settings->experimental_use_rocksdb)
if (coordination_settings[CoordinationSetting::experimental_use_rocksdb])
{
state_machine = nuraft::cs_new<KeeperStateMachine<KeeperRocksStorage>>(
responses_queue_,
@ -302,7 +326,7 @@ void KeeperServer::loadLatestConfig()
{
auto latest_snapshot_config = state_machine->getClusterConfig();
auto latest_log_store_config = state_manager->getLatestConfigFromLogStore();
auto async_replication = keeper_context->getCoordinationSettings()->async_replication;
auto async_replication = keeper_context->getCoordinationSettings()[CoordinationSetting::async_replication];
if (latest_snapshot_config && latest_log_store_config)
{
@ -374,11 +398,11 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
nuraft::raft_params params;
params.parallel_log_appending_ = true;
params.heart_beat_interval_
= getValueOrMaxInt32AndLogWarning(coordination_settings->heart_beat_interval_ms.totalMilliseconds(), "heart_beat_interval_ms", log);
= getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::heart_beat_interval_ms].totalMilliseconds(), "heart_beat_interval_ms", log);
params.election_timeout_lower_bound_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
coordination_settings[CoordinationSetting::election_timeout_lower_bound_ms].totalMilliseconds(), "election_timeout_lower_bound_ms", log);
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
coordination_settings[CoordinationSetting::election_timeout_upper_bound_ms].totalMilliseconds(), "election_timeout_upper_bound_ms", log);
if (params.election_timeout_lower_bound_ || params.election_timeout_upper_bound_)
{
@ -393,7 +417,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
}
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->leadership_expiry_ms.totalMilliseconds(), "leadership_expiry_ms", log);
coordination_settings[CoordinationSetting::leadership_expiry_ms].totalMilliseconds(), "leadership_expiry_ms", log);
if (params.leadership_expiry_ > 0 && params.leadership_expiry_ <= params.election_timeout_lower_bound_)
{
@ -404,25 +428,25 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
);
}
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::reserved_log_items], "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::snapshot_distance], "snapshot_distance", log);
if (params.snapshot_distance_ < 10000)
LOG_WARNING(log, "Very small snapshot_distance {} specified in coordination settings. "
"It doesn't make sense to specify such small value, because it can lead to degraded performance and another issues.", params.snapshot_distance_);
params.stale_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->stale_log_gap, "stale_log_gap", log);
params.fresh_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->fresh_log_gap, "fresh_log_gap", log);
params.stale_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::stale_log_gap], "stale_log_gap", log);
params.fresh_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::fresh_log_gap], "fresh_log_gap", log);
params.client_req_timeout_
= getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds(), "operation_timeout_ms", log);
params.auto_forwarding_ = coordination_settings->auto_forwarding;
= getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds(), "operation_timeout_ms", log);
params.auto_forwarding_ = coordination_settings[CoordinationSetting::auto_forwarding];
params.auto_forwarding_req_timeout_ = std::max<int32_t>(
static_cast<int32_t>(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2),
static_cast<int32_t>(coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds() * 2),
std::numeric_limits<int32_t>::max());
params.auto_forwarding_req_timeout_
= getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log);
= getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::operation_timeout_ms].totalMilliseconds() * 2, "operation_timeout_ms", log);
params.max_append_size_
= getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_append_size, "max_requests_append_size", log);
= getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::max_requests_append_size], "max_requests_append_size", log);
params.return_method_ = nuraft::raft_params::async_handler;
@ -464,7 +488,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
init_options.start_server_in_constructor_ = false;
init_options.raft_callback_ = [this](nuraft::cb_func::Type type, nuraft::cb_func::Param * param) { return callbackFunc(type, param); };
nuraft::ptr<nuraft::logger> logger = nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level);
nuraft::ptr<nuraft::logger> logger = nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings[CoordinationSetting::raft_logs_level]);
asio_service = nuraft::cs_new<nuraft::asio_service>(asio_opts, logger);
// we use the same config as for the CH replicas because it is for internal communication between Keeper instances
@ -505,8 +529,8 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
state_manager->getLogStore()->setRaftServer(raft_instance);
nuraft::raft_server::limits raft_limits;
raft_limits.reconnect_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings->raft_limits_reconnect_limit, "raft_limits_reconnect_limit", log);
raft_limits.response_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings->raft_limits_response_limit, "response_limit", log);
raft_limits.reconnect_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::raft_limits_reconnect_limit], "raft_limits_reconnect_limit", log);
raft_limits.response_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings[CoordinationSetting::raft_limits_response_limit], "response_limit", log);
raft_instance->set_raft_limits(raft_limits);
raft_instance->start_server(init_options.skip_initial_election_timeout_);
@ -527,7 +551,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
const auto & coordination_settings = keeper_context->getCoordinationSettings();
state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items);
state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings[CoordinationSetting::reserved_log_items]);
auto log_store = state_manager->load_log_store();
last_log_idx_on_disk = log_store->next_slot() - 1;
@ -537,7 +561,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
loadLatestConfig();
last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings->async_replication).cluster_config;
last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings[CoordinationSetting::async_replication]).cluster_config;
launchRaftServer(config, enable_ipv6);
@ -546,7 +570,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo
void KeeperServer::shutdownRaftServer()
{
size_t timeout = keeper_context->getCoordinationSettings()->shutdown_timeout.totalSeconds();
size_t timeout = keeper_context->getCoordinationSettings()[CoordinationSetting::shutdown_timeout].totalSeconds();
if (!raft_instance)
{
@ -959,7 +983,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
/// Node first became leader, and after that some other node became leader.
/// BecameFresh for this node will not be called because it was already fresh
/// when it was leader.
if (leader_index < our_index + keeper_context->getCoordinationSettings()->fresh_log_gap)
if (leader_index < our_index + keeper_context->getCoordinationSettings()[CoordinationSetting::fresh_log_gap])
set_initialized();
}
return nuraft::cb_func::ReturnCode::Ok;
@ -994,7 +1018,7 @@ void KeeperServer::waitInit()
{
std::unique_lock lock(initialized_mutex);
int64_t timeout = keeper_context->getCoordinationSettings()->startup_timeout.totalMilliseconds();
int64_t timeout = keeper_context->getCoordinationSettings()[CoordinationSetting::startup_timeout].totalMilliseconds();
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout);
}
@ -1072,7 +1096,7 @@ ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::Ab
if (!diff.empty())
{
std::lock_guard lock{server_write_mutex};
last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings->async_replication).cluster_config;
last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings[CoordinationSetting::async_replication]).cluster_config;
}
return diff;
@ -1099,7 +1123,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi
const auto & coordination_settings = keeper_context->getCoordinationSettings();
if (const auto * add = std::get_if<AddRaftServer>(&action))
{
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
for (size_t i = 0; i < coordination_settings[CoordinationSetting::configuration_change_tries_count] && !is_recovering; ++i)
{
if (raft_instance->get_srv_config(add->id) != nullptr)
return applied(); // NOLINT
@ -1121,7 +1145,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi
return;
}
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
for (size_t i = 0; i < coordination_settings[CoordinationSetting::configuration_change_tries_count] && !is_recovering; ++i)
{
if (raft_instance->get_srv_config(remove->id) == nullptr)
return applied(); // NOLINT
@ -1139,7 +1163,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi
throw Exception(ErrorCodes::RAFT_ERROR,
"Configuration change {} was not accepted by Raft after {} retries",
action, coordination_settings->configuration_change_tries_count);
action, coordination_settings[CoordinationSetting::configuration_change_tries_count]);
}
bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action)
@ -1156,7 +1180,7 @@ bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAc
const auto & coordination_settings = keeper_context->getCoordinationSettings();
if (const auto* add = std::get_if<AddRaftServer>(&action))
{
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
for (size_t i = 0; i < coordination_settings[CoordinationSetting::configuration_change_tries_count] && !is_recovering; ++i)
{
if (raft_instance->get_srv_config(add->id) != nullptr)
return applied();
@ -1167,7 +1191,7 @@ bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAc
}
else if (const auto* remove = std::get_if<RemoveRaftServer>(&action))
{
for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i)
for (size_t i = 0; i < coordination_settings[CoordinationSetting::configuration_change_tries_count] && !is_recovering; ++i)
{
if (raft_instance->get_srv_config(remove->id) == nullptr)
return applied();

View File

@ -913,7 +913,7 @@ SnapshotFileInfoPtr KeeperSnapshotManager<Storage>::getLatestSnapshotInfo() cons
try
{
if (disk->exists(path))
if (disk->existsFile(path))
return std::make_shared<SnapshotFileInfo>(path, disk);
}
catch (...)

View File

@ -4,6 +4,7 @@
#include <Coordination/CoordinationSettings.h>
#include <Coordination/KeeperDispatcher.h>
#include <Coordination/KeeperReconfiguration.h>
#include <Common/thread_local_rng.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStorage.h>
@ -44,6 +45,14 @@ namespace CurrentMetrics
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsBool compress_snapshots_with_zstd_format;
extern const CoordinationSettingsMilliseconds dead_session_check_period_ms;
extern const CoordinationSettingsUInt64 min_request_size_for_cache;
extern const CoordinationSettingsUInt64 snapshots_to_keep;
}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@ -59,7 +68,7 @@ IKeeperStateMachine::IKeeperStateMachine(
: commit_callback(commit_callback_)
, responses_queue(responses_queue_)
, snapshots_queue(snapshots_queue_)
, min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache)
, min_request_size_to_cache(keeper_context_->getCoordinationSettings()[CoordinationSetting::min_request_size_for_cache])
, log(getLogger("KeeperStateMachine"))
, read_pool(CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, 100, 10000, 10000)
, superdigest(superdigest_)
@ -86,11 +95,11 @@ KeeperStateMachine<Storage>::KeeperStateMachine(
commit_callback_,
superdigest_),
snapshot_manager(
keeper_context_->getCoordinationSettings()->snapshots_to_keep,
keeper_context_->getCoordinationSettings()[CoordinationSetting::snapshots_to_keep],
keeper_context_,
keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format,
keeper_context_->getCoordinationSettings()[CoordinationSetting::compress_snapshots_with_zstd_format],
superdigest_,
keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds())
keeper_context_->getCoordinationSettings()[CoordinationSetting::dead_session_check_period_ms].totalMilliseconds())
{
}
@ -151,7 +160,7 @@ void KeeperStateMachine<Storage>::init()
if (!storage)
storage = std::make_unique<Storage>(
keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context);
keeper_context->getCoordinationSettings()[CoordinationSetting::dead_session_check_period_ms].totalMilliseconds(), superdigest, keeper_context);
}
namespace
@ -202,6 +211,18 @@ struct LockGuardWithStats final
template<typename Storage>
nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::pre_commit(uint64_t log_idx, nuraft::buffer & data)
{
double sleep_probability = keeper_context->getPrecommitSleepProbabilityForTesting();
int64_t sleep_ms = keeper_context->getPrecommitSleepMillisecondsForTesting();
if (sleep_ms != 0 && sleep_probability != 0)
{
std::uniform_real_distribution<double> distribution{0., 1.};
if (distribution(thread_local_rng) > (1 - sleep_probability))
{
LOG_WARNING(log, "Precommit sleep enabled, will pause for {} ms", sleep_ms);
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
}
}
auto result = nuraft::buffer::alloc(sizeof(log_idx));
nuraft::buffer_serializer ss(result);
ss.put_u64(log_idx);

View File

@ -11,6 +11,8 @@
namespace DB
{
struct CoordinationSettings;
using CoordinationSettingsPtr = std::shared_ptr<CoordinationSettings>;
using ResponsesQueue = ConcurrentBoundedQueue<KeeperStorageBase::ResponseForSession>;
using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;

View File

@ -15,6 +15,19 @@
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsBool async_replication;
extern const CoordinationSettingsUInt64 commit_logs_cache_size_threshold;
extern const CoordinationSettingsBool compress_logs;
extern const CoordinationSettingsBool force_sync;
extern const CoordinationSettingsUInt64 latest_logs_cache_size_threshold;
extern const CoordinationSettingsUInt64 log_file_overallocate_size;
extern const CoordinationSettingsUInt64 max_flush_batch_size;
extern const CoordinationSettingsUInt64 max_log_file_size;
extern const CoordinationSettingsUInt64 rotate_log_storage_interval;
}
namespace ErrorCodes
{
extern const int RAFT_ERROR;
@ -244,21 +257,21 @@ KeeperStateManager::KeeperStateManager(
: my_server_id(my_server_id_)
, secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false))
, config_prefix(config_prefix_)
, configuration_wrapper(parseServersConfiguration(config, false, keeper_context_->getCoordinationSettings()->async_replication))
, configuration_wrapper(parseServersConfiguration(config, false, keeper_context_->getCoordinationSettings()[CoordinationSetting::async_replication]))
, log_store(nuraft::cs_new<KeeperLogStore>(
LogFileSettings
{
.force_sync = keeper_context_->getCoordinationSettings()->force_sync,
.compress_logs = keeper_context_->getCoordinationSettings()->compress_logs,
.rotate_interval = keeper_context_->getCoordinationSettings()->rotate_log_storage_interval,
.max_size = keeper_context_->getCoordinationSettings()->max_log_file_size,
.overallocate_size = keeper_context_->getCoordinationSettings()->log_file_overallocate_size,
.latest_logs_cache_size_threshold = keeper_context_->getCoordinationSettings()->latest_logs_cache_size_threshold,
.commit_logs_cache_size_threshold = keeper_context_->getCoordinationSettings()->commit_logs_cache_size_threshold
.force_sync = keeper_context_->getCoordinationSettings()[CoordinationSetting::force_sync],
.compress_logs = keeper_context_->getCoordinationSettings()[CoordinationSetting::compress_logs],
.rotate_interval = keeper_context_->getCoordinationSettings()[CoordinationSetting::rotate_log_storage_interval],
.max_size = keeper_context_->getCoordinationSettings()[CoordinationSetting::max_log_file_size],
.overallocate_size = keeper_context_->getCoordinationSettings()[CoordinationSetting::log_file_overallocate_size],
.latest_logs_cache_size_threshold = keeper_context_->getCoordinationSettings()[CoordinationSetting::latest_logs_cache_size_threshold],
.commit_logs_cache_size_threshold = keeper_context_->getCoordinationSettings()[CoordinationSetting::commit_logs_cache_size_threshold]
},
FlushSettings
{
.max_flush_batch_size = keeper_context_->getCoordinationSettings()->max_flush_batch_size,
.max_flush_batch_size = keeper_context_->getCoordinationSettings()[CoordinationSetting::max_flush_batch_size],
},
keeper_context_))
, server_state_file_name(server_state_file_name_)
@ -334,7 +347,7 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state)
auto disk = getStateFileDisk();
if (disk->exists(server_state_file_name))
if (disk->existsFile(server_state_file_name))
{
auto buf = disk->writeFile(copy_lock_file);
buf->finalize();
@ -422,7 +435,7 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
}
};
if (disk->exists(server_state_file_name))
if (disk->existsFile(server_state_file_name))
{
auto state = try_read_file(server_state_file_name);
@ -435,9 +448,9 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
disk->removeFile(server_state_file_name);
}
if (disk->exists(old_path))
if (disk->existsFile(old_path))
{
if (disk->exists(copy_lock_file))
if (disk->existsFile(copy_lock_file))
{
disk->removeFile(old_path);
disk->removeFile(copy_lock_file);
@ -453,7 +466,7 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
disk->removeFile(old_path);
}
}
else if (disk->exists(copy_lock_file))
else if (disk->existsFile(copy_lock_file))
{
disk->removeFile(copy_lock_file);
}
@ -468,9 +481,9 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
}
ClusterUpdateActions KeeperStateManager::getRaftConfigurationDiff(
const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) const
const Poco::Util::AbstractConfiguration & config, const CoordinationSettings & coordination_settings) const
{
auto new_configuration_wrapper = parseServersConfiguration(config, true, coordination_settings->async_replication);
auto new_configuration_wrapper = parseServersConfiguration(config, true, coordination_settings[CoordinationSetting::async_replication]);
std::unordered_map<int, KeeperServerConfigPtr> new_ids, old_ids;
for (const auto & new_server : new_configuration_wrapper.cluster_config->get_servers())

View File

@ -91,7 +91,7 @@ public:
ClusterConfigPtr getLatestConfigFromLogStore() const;
// TODO (myrrc) This should be removed once "reconfig" is stabilized
ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) const;
ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config, const CoordinationSettings & coordination_settings) const;
private:
const String & getOldServerStatePath();

View File

@ -50,6 +50,11 @@ namespace ProfileEvents
namespace DB
{
namespace CoordinationSetting
{
extern const CoordinationSettingsUInt64 log_slow_cpu_threshold_ms;
}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@ -2926,7 +2931,7 @@ void KeeperStorage<Container>::preprocessRequest(
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()[CoordinationSetting::log_slow_cpu_threshold_ms])
{
LOG_INFO(
getLogger("KeeperStorage"),
@ -3116,7 +3121,7 @@ KeeperStorage<Container>::ResponsesForSessions KeeperStorage<Container>::process
Stopwatch watch;
SCOPE_EXIT({
auto elapsed = watch.elapsedMicroseconds();
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms)
if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()[CoordinationSetting::log_slow_cpu_threshold_ms])
{
LOG_INFO(
getLogger("KeeperStorage"),

View File

@ -1,6 +1,5 @@
#pragma once
#include <base/StringRef.h>
#include <Coordination/CoordinationSettings.h>
#include <Coordination/KeeperContext.h>
#include <Common/SipHash.h>
#include <Disks/DiskLocal.h>

View File

@ -38,6 +38,14 @@
#include <Coordination/SnapshotableHashTable.h>
namespace DB::CoordinationSetting
{
extern const CoordinationSettingsBool experimental_use_rocksdb;
extern const CoordinationSettingsUInt64 rotate_log_storage_interval;
extern const CoordinationSettingsUInt64 reserved_log_items;
extern const CoordinationSettingsUInt64 snapshot_distance;
}
namespace fs = std::filesystem;
struct ChangelogDirTest
{
@ -87,7 +95,7 @@ public:
Poco::Logger::root().setLevel("trace");
auto settings = std::make_shared<DB::CoordinationSettings>();
settings->experimental_use_rocksdb = true;
(*settings)[DB::CoordinationSetting::experimental_use_rocksdb] = true;
keeper_context = std::make_shared<DB::KeeperContext>(true, settings);
keeper_context->setLocalLogsPreprocessed();
keeper_context->setRocksDBOptions();
@ -1864,10 +1872,10 @@ void testLogAndStateMachine(
state_machine->init();
DB::KeeperLogStore changelog(
DB::LogFileSettings{
.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval},
.force_sync = true, .compress_logs = enable_compression, .rotate_interval = (*settings)[DB::CoordinationSetting::rotate_log_storage_interval]},
DB::FlushSettings(),
keeper_context);
changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items);
changelog.init(state_machine->last_commit_index() + 1, (*settings)[DB::CoordinationSetting::reserved_log_items]);
for (size_t i = 1; i < total_logs + 1; ++i)
{
@ -1882,7 +1890,7 @@ void testLogAndStateMachine(
state_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
state_machine->commit(i, changelog.entry_at(i)->get_buf());
bool snapshot_created = false;
if (i % settings->snapshot_distance == 0)
if (i % (*settings)[DB::CoordinationSetting::snapshot_distance] == 0)
{
nuraft::snapshot s(i, 0, std::make_shared<nuraft::cluster_config>());
nuraft::async_result<bool>::handler_type when_done
@ -1900,28 +1908,28 @@ void testLogAndStateMachine(
snapshot_task.create_snapshot(std::move(snapshot_task.snapshot), /*execute_only_cleanup=*/false);
}
if (snapshot_created && changelog.size() > settings->reserved_log_items)
changelog.compact(i - settings->reserved_log_items);
if (snapshot_created && changelog.size() > (*settings)[DB::CoordinationSetting::reserved_log_items])
changelog.compact(i - (*settings)[DB::CoordinationSetting::reserved_log_items]);
}
SnapshotsQueue snapshots_queue1{1};
keeper_context = get_keeper_context();
auto restore_machine = std::make_shared<KeeperStateMachine<Storage>>(queue, snapshots_queue1, keeper_context, nullptr);
restore_machine->init();
EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance);
EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % (*settings)[DB::CoordinationSetting::snapshot_distance]);
DB::KeeperLogStore restore_changelog(
DB::LogFileSettings{
.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval},
.force_sync = true, .compress_logs = enable_compression, .rotate_interval = (*settings)[DB::CoordinationSetting::rotate_log_storage_interval]},
DB::FlushSettings(),
keeper_context);
restore_changelog.init(restore_machine->last_commit_index() + 1, settings->reserved_log_items);
restore_changelog.init(restore_machine->last_commit_index() + 1, (*settings)[DB::CoordinationSetting::reserved_log_items]);
EXPECT_EQ(restore_changelog.size(), std::min(settings->reserved_log_items + total_logs % settings->snapshot_distance, total_logs));
EXPECT_EQ(restore_changelog.size(), std::min((*settings)[DB::CoordinationSetting::reserved_log_items] + total_logs % (*settings)[DB::CoordinationSetting::snapshot_distance], total_logs));
EXPECT_EQ(restore_changelog.next_slot(), total_logs + 1);
if (total_logs > settings->reserved_log_items + 1)
if (total_logs > (*settings)[DB::CoordinationSetting::reserved_log_items] + 1)
EXPECT_EQ(
restore_changelog.start_index(), total_logs - total_logs % settings->snapshot_distance - settings->reserved_log_items + 1);
restore_changelog.start_index(), total_logs - total_logs % (*settings)[DB::CoordinationSetting::snapshot_distance] - (*settings)[DB::CoordinationSetting::reserved_log_items] + 1);
else
EXPECT_EQ(restore_changelog.start_index(), 1);
@ -1951,66 +1959,66 @@ TYPED_TEST(CoordinationTest, TestStateMachineAndLogStore)
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 10;
settings->rotate_log_storage_interval = 10;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 10;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 10;
testLogAndStateMachine<Storage>(settings, 37, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 10;
settings->rotate_log_storage_interval = 10;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 10;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 10;
testLogAndStateMachine<Storage>(settings, 11, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 10;
settings->rotate_log_storage_interval = 10;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 10;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 10;
testLogAndStateMachine<Storage>(settings, 40, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 20;
settings->rotate_log_storage_interval = 30;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 20;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 30;
testLogAndStateMachine<Storage>(settings, 40, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 0;
settings->rotate_log_storage_interval = 10;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 0;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 10;
testLogAndStateMachine<Storage>(settings, 40, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 1;
settings->reserved_log_items = 1;
settings->rotate_log_storage_interval = 32;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 1;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 1;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 32;
testLogAndStateMachine<Storage>(settings, 32, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 10;
settings->reserved_log_items = 7;
settings->rotate_log_storage_interval = 1;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 10;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 7;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 1;
testLogAndStateMachine<Storage>(settings, 33, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 37;
settings->reserved_log_items = 1000;
settings->rotate_log_storage_interval = 5000;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 37;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 1000;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 5000;
testLogAndStateMachine<Storage>(settings, 33, this->enable_compression);
}
{
CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
settings->snapshot_distance = 37;
settings->reserved_log_items = 1000;
settings->rotate_log_storage_interval = 5000;
(*settings)[DB::CoordinationSetting::snapshot_distance] = 37;
(*settings)[DB::CoordinationSetting::reserved_log_items] = 1000;
(*settings)[DB::CoordinationSetting::rotate_log_storage_interval] = 5000;
testLogAndStateMachine<Storage>(settings, 45, this->enable_compression);
}
}

View File

@ -106,7 +106,7 @@ Field getBinaryValue(UInt8 type, ReadBuffer & buf)
case Field::Types::Array:
{
Array value;
readBinary(value, buf);
readBinaryArray(value, buf);
return value;
}
case Field::Types::Tuple:
@ -150,28 +150,22 @@ Field getBinaryValue(UInt8 type, ReadBuffer & buf)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field type {}", std::to_string(type));
}
void readBinary(Array & x, ReadBuffer & buf)
void readBinaryArray(Array & x, ReadBuffer & buf)
{
size_t size;
UInt8 type;
readBinary(type, buf);
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
x.push_back(getBinaryValue(type, buf));
x.push_back(readFieldBinary(buf));
}
void writeBinary(const Array & x, WriteBuffer & buf)
void writeBinaryArray(const Array & x, WriteBuffer & buf)
{
UInt8 type = Field::Types::Null;
size_t size = x.size();
if (size)
type = x.front().getType();
writeBinary(type, buf);
writeBinary(size, buf);
for (const auto & elem : x)
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
writeFieldBinary(elem, buf);
}
void writeText(const Array & x, WriteBuffer & buf)
@ -186,11 +180,7 @@ void readBinary(Tuple & x, ReadBuffer & buf)
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
readBinary(type, buf);
x.push_back(getBinaryValue(type, buf));
}
x.push_back(readFieldBinary(buf));
}
void writeBinary(const Tuple & x, WriteBuffer & buf)
@ -199,11 +189,7 @@ void writeBinary(const Tuple & x, WriteBuffer & buf)
writeBinary(size, buf);
for (const auto & elem : x)
{
const UInt8 type = elem.getType();
writeBinary(type, buf);
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
}
writeFieldBinary(elem, buf);
}
void writeText(const Tuple & x, WriteBuffer & buf)
@ -217,11 +203,7 @@ void readBinary(Map & x, ReadBuffer & buf)
readBinary(size, buf);
for (size_t index = 0; index < size; ++index)
{
UInt8 type;
readBinary(type, buf);
x.push_back(getBinaryValue(type, buf));
}
x.push_back(readFieldBinary(buf));
}
void writeBinary(const Map & x, WriteBuffer & buf)
@ -230,11 +212,7 @@ void writeBinary(const Map & x, WriteBuffer & buf)
writeBinary(size, buf);
for (const auto & elem : x)
{
const UInt8 type = elem.getType();
writeBinary(type, buf);
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
}
writeFieldBinary(elem, buf);
}
void writeText(const Map & x, WriteBuffer & buf)
@ -319,6 +297,19 @@ void writeFieldText(const Field & x, WriteBuffer & buf)
buf.write(res.data(), res.size());
}
void writeFieldBinary(const Field & x, WriteBuffer & buf)
{
const UInt8 type = x.getType();
writeBinary(type, buf);
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, x);
}
Field readFieldBinary(ReadBuffer & buf)
{
UInt8 type;
readBinary(type, buf);
return getBinaryValue(type, buf);
}
String Field::dump() const
{

View File

@ -942,14 +942,15 @@ inline Field & Field::operator=(String && str)
class ReadBuffer;
class WriteBuffer;
/// It is assumed that all elements of the array have the same type.
void readBinary(Array & x, ReadBuffer & buf);
/// Binary serialization of generic field.
void writeFieldBinary(const Field & x, WriteBuffer & buf);
Field readFieldBinary(ReadBuffer & buf);
void readBinaryArray(Array & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot read Array."); }
[[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot read Array."); }
/// It is assumed that all elements of the array have the same type.
/// Also write size and type into buf. UInt64 and Int64 is written in variadic size form
void writeBinary(const Array & x, WriteBuffer & buf);
void writeBinaryArray(const Array & x, WriteBuffer & buf);
void writeText(const Array & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Array &, WriteBuffer &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot write Array quoted."); }

View File

@ -170,6 +170,9 @@ Avoid reordering rows when reading from Parquet files. Usually makes it much slo
)", 0) \
M(Bool, input_format_parquet_filter_push_down, true, R"(
When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.
)", 0) \
M(Bool, input_format_parquet_bloom_filter_push_down, false, R"(
When reading Parquet files, skip whole row groups based on the WHERE expressions and bloom filter in the Parquet metadata.
)", 0) \
M(Bool, input_format_parquet_use_native_reader, false, R"(
When reading Parquet files, to use native reader instead of arrow reader.
@ -190,12 +193,18 @@ When reading ORC files, skip whole stripes or row groups based on the WHERE/PREW
)", 0) \
M(String, input_format_orc_reader_time_zone_name, "GMT", R"(
The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.
)", 0) \
M(Bool, input_format_orc_dictionary_as_low_cardinality, true, R"(
Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files.
)", 0) \
M(Bool, input_format_parquet_allow_missing_columns, true, R"(
Allow missing columns while reading Parquet input formats
)", 0) \
M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, R"(
Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format
)", 0) \
M(Bool, input_format_parquet_enable_row_group_prefetch, true, R"(
Enable row group prefetching during parquet parsing. Currently, only single-threaded parsing can prefetch.
)", 0) \
M(Bool, input_format_arrow_allow_missing_columns, true, R"(
Allow missing columns while reading Arrow input formats
@ -604,6 +613,9 @@ See also:
- [Interval](../../sql-reference/data-types/special-data-types/interval.md)
)", 0) \
\
M(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"(
Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6],
corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \
M(Bool, input_format_ipv4_default_on_conversion_error, false, R"(
Deserialization of IPv4 will use default values instead of throwing exception on conversion error.

View File

@ -0,0 +1,13 @@
#pragma once
#include <cstdint>
namespace DB
{
enum class MergeSelectorAlgorithm : uint8_t
{
SIMPLE,
STOCHASTIC_SIMPLE,
};
}

View File

@ -1,12 +1,207 @@
#include <Core/BackgroundSchedulePool.h>
#include <Core/BaseSettings.h>
#include <Core/ServerSettings.h>
#include <IO/MMappedFileCache.h>
#include <IO/UncompressedCache.h>
#include <Interpreters/Context.h>
#include <Interpreters/ProcessList.h>
#include <Storages/MarkCache.h>
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
#include <Storages/System/ServerSettingColumnsParams.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/MemoryTracker.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace CurrentMetrics
{
extern const Metric BackgroundSchedulePoolSize;
extern const Metric BackgroundBufferFlushSchedulePoolSize;
extern const Metric BackgroundDistributedSchedulePoolSize;
extern const Metric BackgroundMessageBrokerSchedulePoolSize;
}
namespace DB
{
IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
#define LIST_OF_SERVER_SETTINGS(M, ALIAS) \
M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \
M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
M(Int32, max_connections, 1024, "Max server connections.", 0) \
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
M(String, default_database, "default", "Default database name.", 0) \
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
M(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \
M(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \
M(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
\
/* Database Catalog */ \
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
M(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \
M(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \
M(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \
M(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \
M(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \
\
\
M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
M(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \
\
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \
M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \
M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \
M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \
M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \
\
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \
M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \
M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \
\
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \
M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \
M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \
M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
\
M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \
M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \
M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \
M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \
M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \
M(UInt64, parts_kill_delay_period, 30, "Period to completely remove parts for SharedMergeTree. Only available in ClickHouse Cloud", 0) \
M(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \
M(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \
M(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \
M(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \
void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, LIST_OF_SERVER_SETTINGS)
struct ServerSettingsImpl : public BaseSettings<ServerSettingsTraits>
{
void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
};
IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, LIST_OF_SERVER_SETTINGS)
void ServerSettingsImpl::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
{
// settings which can be loaded from the the default profile, see also MAKE_DEPRECATED_BY_SERVER_CONFIG in src/Core/Settings.h
std::unordered_set<std::string> settings_from_profile_allowlist = {
@ -35,4 +230,125 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat
}
}
#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) ServerSettings##TYPE NAME = &ServerSettings##Impl ::NAME;
namespace ServerSetting
{
LIST_OF_SERVER_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS)
}
#undef INITIALIZE_SETTING_EXTERN
ServerSettings::ServerSettings() : impl(std::make_unique<ServerSettingsImpl>())
{
}
ServerSettings::ServerSettings(const ServerSettings & settings) : impl(std::make_unique<ServerSettingsImpl>(*settings.impl))
{
}
ServerSettings::~ServerSettings() = default;
#define IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR(CLASS_NAME, TYPE) \
const SettingField##TYPE & ServerSettings::operator[](CLASS_NAME##TYPE t) const \
{ \
return impl.get()->*t; \
} \
SettingField##TYPE & ServerSettings::operator[](CLASS_NAME##TYPE t) \
{ \
return impl.get()->*t; \
}
SERVER_SETTINGS_SUPPORTED_TYPES(ServerSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR)
#undef IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR
void ServerSettings::set(std::string_view name, const Field & value)
{
impl->set(name, value);
}
void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
{
impl->loadSettingsFromConfig(config);
}
void ServerSettings::dumpToSystemServerSettingsColumns(ServerSettingColumnsParams & params) const
{
MutableColumns & res_columns = params.res_columns;
ContextPtr context = params.context;
/// When the server configuration file is periodically re-loaded from disk, the server components (e.g. memory tracking) are updated
/// with new the setting values but the settings themselves are not stored between re-loads. As a result, if one wants to know the
/// current setting values, one needs to ask the components directly.
std::unordered_map<String, std::pair<String, ChangeableWithoutRestart>> changeable_settings
= {{"max_server_memory_usage", {std::to_string(total_memory_tracker.getHardLimit()), ChangeableWithoutRestart::Yes}},
{"max_table_size_to_drop", {std::to_string(context->getMaxTableSizeToDrop()), ChangeableWithoutRestart::Yes}},
{"max_partition_size_to_drop", {std::to_string(context->getMaxPartitionSizeToDrop()), ChangeableWithoutRestart::Yes}},
{"max_concurrent_queries", {std::to_string(context->getProcessList().getMaxSize()), ChangeableWithoutRestart::Yes}},
{"max_concurrent_insert_queries",
{std::to_string(context->getProcessList().getMaxInsertQueriesAmount()), ChangeableWithoutRestart::Yes}},
{"max_concurrent_select_queries",
{std::to_string(context->getProcessList().getMaxSelectQueriesAmount()), ChangeableWithoutRestart::Yes}},
{"max_waiting_queries", {std::to_string(context->getProcessList().getMaxWaitingQueriesAmount()), ChangeableWithoutRestart::Yes}},
{"background_buffer_flush_schedule_pool_size",
{std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize)),
ChangeableWithoutRestart::IncreaseOnly}},
{"background_schedule_pool_size",
{std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}},
{"background_message_broker_schedule_pool_size",
{std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize)),
ChangeableWithoutRestart::IncreaseOnly}},
{"background_distributed_schedule_pool_size",
{std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundDistributedSchedulePoolSize)),
ChangeableWithoutRestart::IncreaseOnly}},
{"mark_cache_size", {std::to_string(context->getMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"index_uncompressed_cache_size",
{std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}},
{"merge_workload", {context->getMergeWorkload(), ChangeableWithoutRestart::Yes}},
{"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}},
{"config_reload_interval_ms", {std::to_string(context->getConfigReloaderInterval()), ChangeableWithoutRestart::Yes}}};
if (context->areBackgroundExecutorsInitialized())
{
changeable_settings.insert(
{"background_pool_size",
{std::to_string(context->getMergeMutateExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
changeable_settings.insert(
{"background_move_pool_size",
{std::to_string(context->getMovesExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
changeable_settings.insert(
{"background_fetches_pool_size",
{std::to_string(context->getFetchesExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
changeable_settings.insert(
{"background_common_pool_size",
{std::to_string(context->getCommonExecutor()->getMaxThreads()), ChangeableWithoutRestart::IncreaseOnly}});
}
for (const auto & setting : impl->all())
{
const auto & setting_name = setting.getName();
const auto & changeable_settings_it = changeable_settings.find(setting_name);
const bool is_changeable = (changeable_settings_it != changeable_settings.end());
res_columns[0]->insert(setting_name);
res_columns[1]->insert(is_changeable ? changeable_settings_it->second.first : setting.getValueString());
res_columns[2]->insert(setting.getDefaultValueString());
res_columns[3]->insert(setting.isValueChanged());
res_columns[4]->insert(setting.getDescription());
res_columns[5]->insert(setting.getTypeName());
res_columns[6]->insert(is_changeable ? changeable_settings_it->second.second : ChangeableWithoutRestart::No);
res_columns[7]->insert(setting.isObsolete());
}
}
}

View File

@ -1,10 +1,8 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Core/Defines.h>
#include <Core/BaseSettingsFwdMacros.h>
#include <Core/SettingsEnums.h>
#include <Core/SettingsFields.h>
namespace Poco::Util
{
@ -13,177 +11,47 @@ class AbstractConfiguration;
namespace DB
{
class Field;
struct ServerSettingColumnsParams;
struct ServerSettingsImpl;
// clang-format off
#define SERVER_SETTINGS(M, ALIAS) \
M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \
M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
M(Int32, max_connections, 1024, "Max server connections.", 0) \
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
M(String, default_database, "default", "Default database name.", 0) \
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
M(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \
M(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \
M(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
\
/* Database Catalog */ \
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
M(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \
M(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \
M(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \
M(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \
M(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \
\
\
M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
M(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \
\
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \
M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \
M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \
M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \
M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \
\
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \
M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \
M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \
\
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \
M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \
M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \
M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
\
M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \
M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \
M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \
M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \
M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \
M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \
M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \
M(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \
M(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \
/// List of available types supported in ServerSettings object
#define SERVER_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \
M(CLASS_NAME, Bool) \
M(CLASS_NAME, Double) \
M(CLASS_NAME, GroupArrayActionWhenLimitReached) \
M(CLASS_NAME, Float) \
M(CLASS_NAME, Int32) \
M(CLASS_NAME, Seconds) \
M(CLASS_NAME, String) \
M(CLASS_NAME, UInt32) \
M(CLASS_NAME, UInt64)
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp
SERVER_SETTINGS_SUPPORTED_TYPES(ServerSettings, DECLARE_SETTING_TRAIT)
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
struct ServerSettings : public BaseSettings<ServerSettingsTraits>
struct ServerSettings
{
void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
};
enum class ChangeableWithoutRestart : uint8_t
{
No,
IncreaseOnly,
DecreaseOnly,
Yes
};
ServerSettings();
ServerSettings(const ServerSettings & settings);
~ServerSettings();
void set(std::string_view name, const Field & value);
void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
SERVER_SETTINGS_SUPPORTED_TYPES(ServerSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR)
void dumpToSystemServerSettingsColumns(ServerSettingColumnsParams & params) const;
private:
std::unique_ptr<ServerSettingsImpl> impl;
};
}

View File

@ -700,6 +700,9 @@ Move more conditions from WHERE to PREWHERE and do reads from disk and filtering
)", 0) \
M(Bool, move_primary_key_columns_to_end_of_prewhere, true, R"(
Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.
)", 0) \
M(Bool, allow_reorder_prewhere_conditions, true, R"(
When moving conditions from WHERE to PREWHERE, allow reordering them to optimize filtering
)", 0) \
\
M(UInt64, alter_sync, 1, R"(
@ -2700,7 +2703,7 @@ The maximum read speed in bytes per second for particular backup on server. Zero
Log query performance statistics into the query_log, query_thread_log and query_views_log.
)", 0) \
M(Bool, log_query_settings, true, R"(
Log query settings into the query_log.
Log query settings into the query_log and OpenTelemetry span log.
)", 0) \
M(Bool, log_query_threads, false, R"(
Setting up query threads logging.
@ -5154,7 +5157,7 @@ SELECT * FROM test_table
Rewrite count distinct to subquery of group by
)", 0) \
M(Bool, throw_if_no_data_to_insert, true, R"(
Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)
Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert). Only applies to INSERTs using [`clickhouse-client`](/docs/en/interfaces/cli) or using the [gRPC interface](/docs/en/interfaces/grpc).
)", 0) \
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, R"(
Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL
@ -5379,7 +5382,7 @@ Result:
If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes
)", 0) \
M(Bool, traverse_shadow_remote_data_paths, false, R"(
Traverse shadow directory when query system.remote_data_paths
Traverse frozen data (shadow directory) in addition to actual table data when query system.remote_data_paths
)", 0) \
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, R"(
If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.
@ -5803,8 +5806,10 @@ Allow to create database with Engine=MaterializedPostgreSQL(...).
M(Bool, allow_experimental_query_deduplication, false, R"(
Experimental data deduplication for SELECT queries based on part UUIDs
)", 0) \
M(Bool, implicit_select, false, R"(
Allow writing simple SELECT queries without the leading SELECT keyword, which makes it simple for calculator-style usage, e.g. `1 + 2` becomes a valid query.
)", 0)
/** End of experimental features */
// End of COMMON_SETTINGS
// Please add settings related to formats in FormatFactorySettingsDeclaration.h, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.

View File

@ -78,6 +78,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
{"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"},
{"implicit_select", false, false, "A new setting."},
{"output_format_native_write_json_as_string", false, false, "Add new setting to allow write JSON column as single String column in Native format"},
{"output_format_binary_write_json_as_string", false, false, "Add new setting to write values of JSON type as JSON string in RowBinary output format"},
{"input_format_binary_read_json_as_string", false, false, "Add new setting to read values of JSON type as JSON string in RowBinary input format"},
@ -99,8 +100,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"distributed_cache_read_alignment", 0, 0, "A setting for ClickHouse Cloud"},
{"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"},
{"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"},
{"input_format_parquet_enable_row_group_prefetch", false, true, "Enable row group prefetching during parquet parsing. Currently, only single-threaded parsing can prefetch."},
{"input_format_orc_dictionary_as_low_cardinality", false, true, "Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files"},
{"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
{"max_parts_to_move", 1000, 1000, "New setting"},
{"allow_reorder_prewhere_conditions", false, true, "New setting"},
{"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},
{"date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands", false, false, "Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to (0, 3, 6), corresponding to 'seconds', 'milliseconds', and 'microseconds'."}
}
},
{"24.9",

View File

@ -1,7 +1,6 @@
#include <Core/SettingsEnums.h>
#include <magic_enum.hpp>
#include <Access/Common/SQLSecurityDefs.h>
#include <boost/range/adaptor/map.hpp>
@ -273,4 +272,11 @@ IMPLEMENT_SETTING_ENUM(
{{"user_display", IdentifierQuotingRule::UserDisplay},
{"when_necessary", IdentifierQuotingRule::WhenNecessary},
{"always", IdentifierQuotingRule::Always}})
IMPLEMENT_SETTING_ENUM(
MergeSelectorAlgorithm,
ErrorCodes::BAD_ARGUMENTS,
{{"Simple", MergeSelectorAlgorithm::SIMPLE},
{"StochasticSimple", MergeSelectorAlgorithm::STOCHASTIC_SIMPLE}})
}

View File

@ -14,6 +14,7 @@
#include <Parsers/IdentifierQuotingStyle.h>
#include <QueryPipeline/SizeLimits.h>
#include <Common/ShellCommandSettings.h>
#include <Core/MergeSelectorAlgorithm.h>
namespace DB
@ -363,4 +364,6 @@ enum class GroupArrayActionWhenLimitReached : uint8_t
};
DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached)
DECLARE_SETTING_ENUM(MergeSelectorAlgorithm)
}

View File

@ -36,8 +36,8 @@ public:
auto findByValue(const T & value) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
auto it = value_to_name_map.find(value);
if (it == value_to_name_map.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value {} in enum", toString(value));
return it;
@ -58,7 +58,7 @@ public:
bool getNameForValue(const T & value, StringRef & result) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
if (it == value_to_name_map.end())
return false;
result = it->second;

View File

@ -321,6 +321,8 @@ bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \
bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \
bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \
bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \
bool isUInt128(TYPE data_type) { return WhichDataType(data_type).isUInt128(); } \
bool isUInt256(TYPE data_type) { return WhichDataType(data_type).isUInt256(); } \
bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \
bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \
\
@ -328,6 +330,8 @@ bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \
bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \
bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \
bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \
bool isInt128(TYPE data_type) { return WhichDataType(data_type).isInt128(); } \
bool isInt256(TYPE data_type) { return WhichDataType(data_type).isInt256(); } \
bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \
bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \
\

Some files were not shown because too many files have changed in this diff Show More