diff --git a/.gitignore b/.gitignore
index a04c60d5ca3..5341f23a94f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,6 +69,7 @@ cmake-build-*
*.pyc
__pycache__
*.pytest_cache
+.mypy_cache
test.cpp
CPackConfig.cmake
@@ -161,8 +162,10 @@ tests/queries/0_stateless/test_*
tests/queries/0_stateless/*.binary
tests/queries/0_stateless/*.generated-expect
tests/queries/0_stateless/*.expect.history
+tests/integration/**/_gen
# rust
/rust/**/target
# It is autogenerated from *.in
/rust/**/.cargo/config.toml
+/rust/**/vendor
diff --git a/.gitmodules b/.gitmodules
index 151dc28c55b..30085fb8dd4 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -258,9 +258,6 @@
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash
-[submodule "contrib/hashidsxx"]
- path = contrib/hashidsxx
- url = https://github.com/schoentoon/hashidsxx
[submodule "contrib/nats-io"]
path = contrib/nats-io
url = https://github.com/ClickHouse/nats.c
@@ -343,3 +340,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git
+[submodule "contrib/incbin"]
+ path = contrib/incbin
+ url = https://github.com/graphitemaster/incbin.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf6b309ef2c..f401b346726 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
### Table of Contents
+**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +10,181 @@
# 2023 Changelog
+### ClickHouse release 23.7, 2023-07-27
+
+#### Backward Incompatible Change
+* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)).
+* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)).
+* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### New Feature
+* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)).
+* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)).
+* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)).
+* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)).
+* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)).
+* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)).
+* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)).
+* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)).
+* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)).
+* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)).
+* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)).
+* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)).
+* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)).
+* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)).
+* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)).
+* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)).
+* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)).
+* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)).
+* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)).
+* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
+* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)).
+* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Performance Improvement
+* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)).
+* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)).
+* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)).
+* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)).
+* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)).
+* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)).
+* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)).
+* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)).
+* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
+* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)).
+* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)).
+* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### Improvement
+* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)).
+* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)).
+* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)).
+* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)).
+* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)).
+* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)).
+* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)).
+* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)).
+* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)).
+* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)).
+* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)).
+* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)).
+* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)).
+* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)).
+* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)).
+* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)).
+* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)).
+* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)).
+* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)).
+* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)).
+
+#### Experimental Feature
+* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release.
+* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)).
+* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)).
+* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)).
+* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)).
+* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)).
+* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
+* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)).
+* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)).
+* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)).
+* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)).
+* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)).
+* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)).
+* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
+* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)).
+* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)).
+* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)).
+* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)).
+* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)).
+* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)).
+* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)).
+* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)).
+* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)).
+* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
+* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)).
+* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
+* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)).
+* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
+
+
### ClickHouse release 23.6, 2023-06-29
#### Backward Incompatible Change
diff --git a/SECURITY.md b/SECURITY.md
index 4ba5f13d09c..d61533b44b9 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
+| 23.7 | ✔️ |
| 23.6 | ✔️ |
| 23.5 | ✔️ |
-| 23.4 | ✔️ |
+| 23.4 | ❌ |
| 23.3 | ✔️ |
| 23.2 | ❌ |
| 23.1 | ❌ |
diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h
index 83232669c04..fda94edaa88 100644
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end)
/// See https://github.com/boostorg/algorithm/issues/63
/// And https://bugs.llvm.org/show_bug.cgi?id=41141
template
-inline void splitInto(To & to, const std::string & what, bool token_compress = false)
+inline To & splitInto(To & to, std::string_view what, bool token_compress = false)
{
const char * pos = what.data();
const char * end = pos + what.size();
@@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f
else
pos = delimiter_or_end;
}
+
+ return to;
}
diff --git a/base/base/move_extend.h b/base/base/move_extend.h
new file mode 100644
index 00000000000..6e5b16e037c
--- /dev/null
+++ b/base/base/move_extend.h
@@ -0,0 +1,9 @@
+#pragma once
+
+/// Extend @p to by moving elements from @p from to @p to end
+/// @return @p to iterator to first of moved elements.
+template
+typename To::iterator moveExtend(To & to, From && from)
+{
+ return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end()));
+}
diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h
index e8f04888ab4..282c7fb5fd1 100644
--- a/base/poco/Foundation/include/Poco/Message.h
+++ b/base/poco/Foundation/include/Poco/Message.h
@@ -67,6 +67,8 @@ public:
Message(
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
+ Message(
+ std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
/// Creates a Message with the given source, text, priority,
/// source file path and line.
///
diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h
index 1880af4ccd2..eba8109253d 100644
--- a/base/poco/Foundation/include/Poco/URI.h
+++ b/base/poco/Foundation/include/Poco/URI.h
@@ -57,7 +57,7 @@ public:
URI();
/// Creates an empty URI.
- explicit URI(const std::string & uri);
+ explicit URI(const std::string & uri, bool disable_url_encoding = false);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@@ -350,6 +350,10 @@ protected:
static const std::string ILLEGAL;
private:
+ void encodePath(std::string & encodedStr) const;
+ void decodePath(const std::string & encodedStr);
+
+
std::string _scheme;
std::string _userInfo;
std::string _host;
@@ -357,6 +361,8 @@ private:
std::string _path;
std::string _query;
std::string _fragment;
+
+ bool _disable_url_encoding = false;
};
diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp
index 663c96e47a2..54118cc0fc5 100644
--- a/base/poco/Foundation/src/Message.cpp
+++ b/base/poco/Foundation/src/Message.cpp
@@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
}
+Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
+ _source(std::move(source)),
+ _text(std::move(text)),
+ _prio(prio),
+ _tid(0),
+ _file(file),
+ _line(line),
+ _pMap(0),
+ _fmt_str(fmt_str)
+{
+ init();
+}
+
Message::Message(const Message& msg):
_source(msg._source),
_text(msg._text),
diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp
index 5543e02b279..3354c69d188 100644
--- a/base/poco/Foundation/src/URI.cpp
+++ b/base/poco/Foundation/src/URI.cpp
@@ -36,8 +36,8 @@ URI::URI():
}
-URI::URI(const std::string& uri):
- _port(0)
+URI::URI(const std::string& uri, bool decode_and_encode_path):
+ _port(0), _disable_url_encoding(decode_and_encode_path)
{
parse(uri);
}
@@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port),
_path(uri._path),
_query(uri._query),
- _fragment(uri._fragment)
+ _fragment(uri._fragment),
+ _disable_url_encoding(uri._disable_url_encoding)
{
}
@@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port),
_path(baseURI._path),
_query(baseURI._query),
- _fragment(baseURI._fragment)
+ _fragment(baseURI._fragment),
+ _disable_url_encoding(baseURI._disable_url_encoding)
{
resolve(relativeURI);
}
@@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
+ _disable_url_encoding = uri._disable_url_encoding;
}
return *this;
}
@@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
+ std::swap(_disable_url_encoding, uri._disable_url_encoding);
}
@@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri;
if (isRelative())
{
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else
{
@@ -217,7 +221,7 @@ std::string URI::toString() const
{
if (!auth.empty() && _path[0] != '/')
uri += '/';
- encode(_path, RESERVED_PATH, uri);
+ encodePath(uri);
}
else if (!_query.empty() || !_fragment.empty())
{
@@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path)
{
_path.clear();
- decode(path, _path);
+ decodePath(path);
}
@@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const
{
std::string pathEtc;
- encode(_path, RESERVED_PATH, pathEtc);
+ encodePath(pathEtc);
if (!_query.empty())
{
pathEtc += '?';
@@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const
{
std::string pathAndQuery;
- encode(_path, RESERVED_PATH, pathAndQuery);
+ encodePath(pathAndQuery);
if (!_query.empty())
{
pathAndQuery += '?';
@@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
}
}
+void URI::encodePath(std::string & encodedStr) const
+{
+ if (_disable_url_encoding)
+ encodedStr = _path;
+ else
+ encode(_path, RESERVED_PATH, encodedStr);
+}
+
+void URI::decodePath(const std::string & encodedStr)
+{
+ if (_disable_url_encoding)
+ _path = encodedStr;
+ else
+ decode(encodedStr, _path);
+}
bool URI::isWellKnownPort() const
{
@@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{
std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++;
- decode(path, _path);
+ decodePath(path);
}
diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
index d495d662f75..167a06eb7ff 100644
--- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
@@ -306,7 +306,7 @@ namespace Net
DEFAULT_KEEP_ALIVE_TIMEOUT = 8
};
- void reconnect();
+ virtual void reconnect();
/// Connects the underlying socket to the HTTP server.
int write(const char * buffer, std::streamsize length);
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 821b7b46855..9919d018046 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54476)
+SET(VERSION_REVISION 54477)
SET(VERSION_MAJOR 23)
-SET(VERSION_MINOR 7)
+SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df)
-SET(VERSION_DESCRIBE v23.7.1.1-testing)
-SET(VERSION_STRING 23.7.1.1)
+SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8)
+SET(VERSION_DESCRIBE v23.8.1.1-testing)
+SET(VERSION_STRING 23.8.1.1)
# end of autochange
diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake
deleted file mode 100644
index e5428c24939..00000000000
--- a/cmake/embed_binary.cmake
+++ /dev/null
@@ -1,58 +0,0 @@
-# Embed a set of resource files into a resulting object file.
-#
-# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...)
-#
-# This will generate a static library target named ``, which contains the contents of
-# each `` file. The files should be located in ``. defaults to
-# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
-#
-# Each resource will result in three symbols in the final archive, based on the name ``.
-# These are:
-# 1. `_binary__start`: Points to the start of the binary data from ``.
-# 2. `_binary__end`: Points to the end of the binary data from ``.
-# 2. `_binary__size`: Points to the size of the binary data from ``.
-#
-# `` is a normalized name derived from ``, by replacing the characters "./-" with
-# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
-# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
-macro(clickhouse_embed_binaries)
- set(one_value_args TARGET RESOURCE_DIR)
- set(resources RESOURCES)
- cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
-
- if (NOT DEFINED EMBED_TARGET)
- message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
- endif()
-
- if (NOT DEFINED EMBED_RESOURCE_DIR)
- set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
- endif()
-
- list(LENGTH EMBED_RESOURCES N_RESOURCES)
- if (N_RESOURCES LESS 1)
- message(FATAL_ERROR "The list of binary resources to embed may not be empty")
- endif()
-
- add_library("${EMBED_TARGET}" STATIC)
- set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
-
- set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
-
- foreach(RESOURCE_FILE ${EMBED_RESOURCES})
- set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
- set(BINARY_FILE_NAME "${RESOURCE_FILE}")
-
- # Normalize the name of the resource.
- string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
- string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
-
- # Generate the configured assembly file in the output directory.
- configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
-
- # Set the include directory for relative paths specified for `.incbin` directive.
- set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
-
- target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
- set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
- endforeach()
-endmacro()
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 2af468970f1..fdf6e60e58f 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -164,14 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
-add_contrib (hashidsxx-cmake hashidsxx)
+add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
- add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()
diff --git a/contrib/NuRaft b/contrib/NuRaft
index 491eaf592d9..eb1572129c7 160000
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@@ -1 +1 @@
-Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf
+Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 46b86cb4ddb..e3ea0381595 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -502,9 +502,10 @@ target_include_directories(_parquet SYSTEM BEFORE
"${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src"
"${CMAKE_CURRENT_SOURCE_DIR}/cpp/src")
target_link_libraries(_parquet
- PUBLIC _arrow
- PRIVATE
+ PUBLIC
+ _arrow
ch_contrib::thrift
+ PRIVATE
boost::headers_only
boost::regex
OpenSSL::Crypto OpenSSL::SSL)
diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt
index 10070fbd949..7161f743de1 100644
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@@ -1,4 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
set (SRCS
@@ -23,12 +22,10 @@ if (OS_FREEBSD)
endif ()
# Related to time_zones table:
-# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
-# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
-# as the library that's built using embedded tzdata is also specific to OS_LINUX
-set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
+# TimeZones.generated.cpp is autogenerated each time during a build
+set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
-file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
+file(REMOVE ${TIMEZONES_FILE})
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
-set(TIMEZONE_RESOURCE_FILES)
-
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
+file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
+file(APPEND ${TIMEZONES_FILE} "#include \n")
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+ file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
foreach(TIMEZONE ${TIMEZONES})
- file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
- list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
+ file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
-file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
-clickhouse_embed_binaries(
- TARGET tzdata
- RESOURCE_DIR "${TZDIR}"
- RESOURCES ${TIMEZONE_RESOURCE_FILES}
-)
-add_dependencies(_cctz tzdata)
-target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
+
+file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
+
+file(APPEND ${TIMEZONES_FILE} "#include \n\n")
+file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
+
+set (COUNTER 1)
+foreach(TIMEZONE ${TIMEZONES})
+ file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
+ MATH(EXPR COUNTER "${COUNTER}+1")
+endforeach(TIMEZONE)
+
+file(APPEND ${TIMEZONES_FILE} " return {};\n")
+file(APPEND ${TIMEZONES_FILE} "}\n")
+
+add_library (tzdata ${TIMEZONES_FILE})
+target_link_libraries(tzdata ch_contrib::incbin)
+target_link_libraries(_cctz tzdata)
add_library(ch_contrib::cctz ALIAS _cctz)
diff --git a/contrib/hashidsxx b/contrib/hashidsxx
deleted file mode 160000
index 783f6911ccf..00000000000
--- a/contrib/hashidsxx
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt
deleted file mode 100644
index 17f3888bd94..00000000000
--- a/contrib/hashidsxx-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
-
-set (SRCS
- "${LIBRARY_DIR}/hashids.cpp"
-)
-
-set (HDRS
- "${LIBRARY_DIR}/hashids.h"
-)
-
-add_library(_hashidsxx ${SRCS} ${HDRS})
-target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
-
-add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
diff --git a/contrib/idxd-config b/contrib/idxd-config
index f6605c41a73..a836ce0e420 160000
--- a/contrib/idxd-config
+++ b/contrib/idxd-config
@@ -1 +1 @@
-Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
+Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1
diff --git a/contrib/incbin b/contrib/incbin
new file mode 160000
index 00000000000..6e576cae5ab
--- /dev/null
+++ b/contrib/incbin
@@ -0,0 +1 @@
+Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..5778cf83c22
--- /dev/null
+++ b/contrib/incbin-cmake/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
+add_library(_incbin INTERFACE)
+target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
+add_library(ch_contrib::incbin ALIAS _incbin)
+
+# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
+# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
+target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)
diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt
deleted file mode 100644
index 5380269c479..00000000000
--- a/contrib/nlp-data-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
-set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
-
-add_library (_nlp_data INTERFACE)
-
-clickhouse_embed_binaries(
- TARGET nlp_dictionaries
- RESOURCE_DIR "${LIBRARY_DIR}"
- RESOURCES charset.zst tonality_ru.zst programming.zst
-)
-
-add_dependencies(_nlp_data nlp_dictionaries)
-target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
-add_library(ch_contrib::nlp_data ALIAS _nlp_data)
diff --git a/contrib/qpl b/contrib/qpl
index 3f8f5cea277..faaf1935045 160000
--- a/contrib/qpl
+++ b/contrib/qpl
@@ -1 +1 @@
-Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679
+Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 8a6324aef88..c9800e4e66d 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.1.2470"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 897bcd24d04..99e748c41d4 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
rustup target add aarch64-apple-darwin && \
rustup target add powerpc64le-unknown-linux-gnu
+# Create vendor cache for cargo.
+#
+# Note, that the config.toml for the root is used, you will not be able to
+# install any other crates, except those which had been vendored (since if
+# there is "replace-with" for some source, then cargo will not look to other
+# remotes except this).
+#
+# Notes for the command itself:
+# - --chown is required to preserve the rights
+# - unstable-options for -C
+# - chmod is required to fix the permissions, since builds are running from a different user
+# - copy of the Cargo.lock is required for proper dependencies versions
+# - cargo vendor --sync is requried to overcome [1] bug.
+#
+# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23
+COPY --chown=root:root /rust /rust/packages
+RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \
+ cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \
+ cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \
+ rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \
+ sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \
+ cat $CARGO_HOME/config.toml && \
+ mv /rust/packages/vendor /rust/vendor && \
+ chmod -R o=r+X /rust/vendor && \
+ ls -R -l /rust/packages && \
+ rm -r /rust/packages
+
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
# A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work):
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust
new file mode 120000
index 00000000000..742dc49e9ac
--- /dev/null
+++ b/docker/packager/binary/rust
@@ -0,0 +1 @@
+../../../rust
\ No newline at end of file
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 7f453627601..f558338b23c 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.1.2470"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 1fa7b83ae16..156de034a7f 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.6.2.18"
+ARG VERSION="23.7.1.2470"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 828c73e6781..60e6199aaa4 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -141,13 +141,13 @@ function clone_submodules
contrib/jemalloc
contrib/replxx
contrib/wyhash
- contrib/hashidsxx
contrib/c-ares
contrib/morton-nd
contrib/xxHash
contrib/simdjson
contrib/liburing
contrib/libfiu
+ contrib/incbin
)
git submodule sync
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile
index 0d1fa00b214..8e95d94b6dc 100644
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@@ -135,4 +135,5 @@ ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
-CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
+# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS
+CMD ["sh", "-c", "pytest"]
diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml
index 7e34f4c114d..30d1b0bed3f 100644
--- a/docker/test/integration/runner/compose/docker_compose_kafka.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml
@@ -4,6 +4,8 @@ services:
kafka_zookeeper:
image: zookeeper:3.4.9
hostname: kafka_zookeeper
+ ports:
+ - 2181:2181
environment:
ZOO_MY_ID: 1
ZOO_PORT: 2181
@@ -15,15 +17,14 @@ services:
image: confluentinc/cp-kafka:5.2.0
hostname: kafka1
ports:
- - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
+ - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
KAFKA_ADVERTISED_HOST_NAME: kafka1
- KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_BROKER_ID: 1
- KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
+ KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on:
@@ -35,13 +36,38 @@ services:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry
ports:
- - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
+ - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+ SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
+ SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
depends_on:
- kafka_zookeeper
- kafka1
+ restart: always
+ security_opt:
+ - label:disable
+
+ schema-registry-auth:
+ image: confluentinc/cp-schema-registry:5.2.0
+ hostname: schema-registry-auth
+ ports:
+ - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
+ environment:
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
+ SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
+ SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
+ SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
+ SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
+ SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
+ SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
+ SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
+ volumes:
+ - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
+ depends_on:
+ - kafka_zookeeper
+ - kafka1
+ restart: always
security_opt:
- label:disable
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index fe53925ecc8..3694fb7c2f6 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -4,6 +4,9 @@
set -e -x -a
# Choose random timezone for this test run.
+#
+# NOTE: that clickhouse-test will randomize session_timezone by itself as well
+# (it will choose between default server timezone and something specific).
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 4926967d2d2..9217fcfddd9 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
+source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
start
-shellcheck disable=SC2086 # No quotes because I want to split it into words.
+# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index b8061309342..13c352d5d41 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
+source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
@@ -61,6 +62,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
+rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
+rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
diff --git a/docs/changelogs/v23.7.1.2470-stable.md b/docs/changelogs/v23.7.1.2470-stable.md
new file mode 100644
index 00000000000..a77078cb653
--- /dev/null
+++ b/docs/changelogs/v23.7.1.2470-stable.md
@@ -0,0 +1,452 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.7.1.2470-stable (a70127baecc) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088)
+
+#### Backward Incompatible Change
+* Add ` NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)).
+* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after **downgrade** (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)).
+* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The function `toDecimalString` is removed due to subpar implementation quality. This closes [#52407](https://github.com/ClickHouse/ClickHouse/issues/52407). [#52450](https://github.com/ClickHouse/ClickHouse/pull/52450) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### New Feature
+* Implement KQL-style formatting for Interval. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)).
+* Support ZooKeeper `reconfig` command for CH Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)).
+* Kafka connector can fetch avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)).
+* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Added support for prql as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Add a column is_obsolete to system.settings and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)).
+* Implement support of encrypted elements in configuration file Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)).
+* Just a new request of [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)).
+* Add SYSTEM STOP LISTEN query. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Add input_format_csv_allow_variable_number_of_columns options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Another boring feature: add function substring_index, as in spark or mysql. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)).
+* Show stats for jemalloc bins. Example ``` SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)).
+* Add RowBinaryWithDefaults format with extra byte before each column for using column default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)).
+* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)).
+* Added new initcap / initcapUTF8 functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)).
+* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)).
+* Added Peak Memory Usage (for query) to client final statistics, and to http header. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added new hasSubsequence() (+CaseInsensitive + UTF8 versions) functions. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)).
+* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)).
+* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)).
+* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Re-add SipHash keyed functions. [#52206](https://github.com/ClickHouse/ClickHouse/pull/52206) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)).
+
+#### Performance Improvement
+* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)).
+* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)).
+* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)).
+* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)).
+* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)).
+* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)).
+* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)).
+* Remove duplicate condition in functionunixtimestamp64.h. [#51857](https://github.com/ClickHouse/ClickHouse/pull/51857) ([lcjh](https://github.com/ljhcage)).
+* The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)).
+* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count` which default is 0. [#52004](https://github.com/ClickHouse/ClickHouse/pull/52004) ([JackyWoo](https://github.com/JackyWoo)).
+* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **11.6%** to the QPS of the query **Q8** while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
+* Reduce the number of syscalls in FileCache::loadMetadata. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### Improvement
+* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)).
+* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix two issues: ``` select geohashEncode(120.2, number::Float64) from numbers(10);. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)).
+* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)).
+* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Allow filtering HTTP headers with `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)).
+* #50727 new alias for function current_database and added new function current_schemas. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)).
+* Log async insert flush queries into to system.query_log. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)).
+* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)).
+* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)).
+* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)).
+* A third-party product depending on ClickHouse (Gluten: Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)).
+* Fix checking error caused by uninitialized class members. [#51418](https://github.com/ClickHouse/ClickHouse/pull/51418) ([李扬](https://github.com/taiyang-li)).
+* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)).
+* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Correcting the message of modify storage policy https://github.com/clickhouse/clickhouse/issues/51516 ### documentation entry for user-facing changes. [#51519](https://github.com/ClickHouse/ClickHouse/pull/51519) ([xiaolei565](https://github.com/xiaolei565)).
+* Support `DROP FILESYSTEM CACHE KEY [ OFFSET ]`. [#51547](https://github.com/ClickHouse/ClickHouse/pull/51547) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add column modification time into system.parts_columns. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)).
+* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)).
+* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)).
+* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Added support for function range of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)).
+* Improve MySQL compatibility of statement SHOW INDEX. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)).
+* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)).
+* CacheDictionary request only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fixed settings not applied for explain query when format provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Var-int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)).
+* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)).
+* Fixed crash when mysqlxx::Pool::Entry is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)).
+* CREATE TABLE ... AS SELECT .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)).
+* Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)).
+* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
+* Log messages are written to text_log from the beginning. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception will be thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Support async_deduplication_token for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)).
+* Avro input format support Union with single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)).
+* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). This is defaulted to false until [#52075](https://github.com/ClickHouse/ClickHouse/issues/52075) is fixed. [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)).
+* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* 1. Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)).
+* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#52181](https://github.com/ClickHouse/ClickHouse/pull/52181) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)).
+* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Now unquoted utf-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)).
+* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093) . [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)).
+* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)).
+* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)).
+* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)).
+* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52419](https://github.com/ClickHouse/ClickHouse/pull/52419) ([alesapin](https://github.com/alesapin)).
+* The `session_timezone` setting is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)).
+* Added field `refcount` to `system.remote_data_paths` table. [#52518](https://github.com/ClickHouse/ClickHouse/pull/52518) ([Anton Popov](https://github.com/CurtizJ)).
+* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### Build/Testing/Packaging Improvement
+* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fixed CRC32(WeakHash32) issue for s390x. [#50365](https://github.com/ClickHouse/ClickHouse/pull/50365) ([Harry Lee](https://github.com/HarryLeeIBM)).
+* Add integration test check with the enabled analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) ([Dmitry Novik](https://github.com/novikd)).
+* Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
+* Fixed several issues found by OSS-Fuzz. [#51736](https://github.com/ClickHouse/ClickHouse/pull/51736) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* There were a couple of failures because of (?) S3 availability. The sccache has a feature of failing over to local compilation. [#51893](https://github.com/ClickHouse/ClickHouse/pull/51893) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* 02242_delete_user_race and 02243_drop_user_grant_race tests have been corrected. [#51923](https://github.com/ClickHouse/ClickHouse/pull/51923) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)).
+* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Follow up [#50926](https://github.com/ClickHouse/ClickHouse/issues/50926). Add integration tests check with enabled analyzer to master. [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)).
+* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)).
+* Improve the startup time of `clickhouse-client` and `clickhouse-local` in debug and sanitizer builds. This closes [#52228](https://github.com/ClickHouse/ClickHouse/issues/52228). [#52489](https://github.com/ClickHouse/ClickHouse/pull/52489) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix materialised pg syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)).
+* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)).
+* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)).
+* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)).
+* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)).
+* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)).
+* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
+* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)).
+* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)).
+* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)).
+* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)).
+* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)).
+* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)).
+* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)).
+* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)).
+* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)).
+* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
+* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)).
+* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)).
+* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
+* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)).
+* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY: 'Revert "Add documentation for building in docker"'. [#51773](https://github.com/ClickHouse/ClickHouse/pull/51773) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY: 'Revert "Fix build"'. [#51911](https://github.com/ClickHouse/ClickHouse/pull/51911) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY: 'Revert "Millisecond and microsecond support in date_diff / age functions"'. [#52129](https://github.com/ClickHouse/ClickHouse/pull/52129) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY: 'Revert "Re-add SipHash keyed functions"'. [#52466](https://github.com/ClickHouse/ClickHouse/pull/52466) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY: 'Revert "Add an ability to specify allocations size for sampling memory profiler"'. [#52496](https://github.com/ClickHouse/ClickHouse/pull/52496) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY: 'Revert "Rewrite uniq to count"'. [#52576](https://github.com/ClickHouse/ClickHouse/pull/52576) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Remove duplicate_order_by_and_distinct optimization [#47135](https://github.com/ClickHouse/ClickHouse/pull/47135) ([Igor Nikonov](https://github.com/devcrafter)).
+* Update sort desc in ReadFromMergeTree after applying PREWHERE info [#48669](https://github.com/ClickHouse/ClickHouse/pull/48669) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix `BindException: Address already in use` in HDFS integration tests [#49428](https://github.com/ClickHouse/ClickHouse/pull/49428) ([Nikita Taranov](https://github.com/nickitat)).
+* Force libunwind usage (removes gcc_eh support) [#49438](https://github.com/ClickHouse/ClickHouse/pull/49438) ([Azat Khuzhin](https://github.com/azat)).
+* Cleanup `storage_conf.xml` [#49557](https://github.com/ClickHouse/ClickHouse/pull/49557) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check [#49764](https://github.com/ClickHouse/ClickHouse/pull/49764) ([Michael Kolupaev](https://github.com/al13n321)).
+* Remove unstable queries from performance/join_set_filter [#50235](https://github.com/ClickHouse/ClickHouse/pull/50235) ([vdimir](https://github.com/vdimir)).
+* More accurate DNS resolve for the keeper connection [#50738](https://github.com/ClickHouse/ClickHouse/pull/50738) ([pufit](https://github.com/pufit)).
+* Try to fix some trash in Disks and part moves [#51135](https://github.com/ClickHouse/ClickHouse/pull/51135) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add jemalloc support fro s390x [#51186](https://github.com/ClickHouse/ClickHouse/pull/51186) ([Boris Kuschel](https://github.com/bkuschel)).
+* Resubmit [#48821](https://github.com/ClickHouse/ClickHouse/issues/48821) [#51208](https://github.com/ClickHouse/ClickHouse/pull/51208) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* test for [#36894](https://github.com/ClickHouse/ClickHouse/issues/36894) [#51274](https://github.com/ClickHouse/ClickHouse/pull/51274) ([Denny Crane](https://github.com/den-crane)).
+* external_aggregation_fix for big endian machines [#51280](https://github.com/ClickHouse/ClickHouse/pull/51280) ([Sanjam Panda](https://github.com/saitama951)).
+* Fix: Invalid number of rows in Chunk column Object [#51296](https://github.com/ClickHouse/ClickHouse/pull/51296) ([Igor Nikonov](https://github.com/devcrafter)).
+* Add a test for [#44816](https://github.com/ClickHouse/ClickHouse/issues/44816) [#51305](https://github.com/ClickHouse/ClickHouse/pull/51305) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for `calculate_text_stack_trace` setting [#51311](https://github.com/ClickHouse/ClickHouse/pull/51311) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* decrease log level, make logs shorter [#51320](https://github.com/ClickHouse/ClickHouse/pull/51320) ([Sema Checherinda](https://github.com/CheSema)).
+* Collect stack traces from job's scheduling and print along with exception's stack trace. [#51349](https://github.com/ClickHouse/ClickHouse/pull/51349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Add a test for [#42691](https://github.com/ClickHouse/ClickHouse/issues/42691) [#51352](https://github.com/ClickHouse/ClickHouse/pull/51352) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#32474](https://github.com/ClickHouse/ClickHouse/issues/32474) [#51354](https://github.com/ClickHouse/ClickHouse/pull/51354) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#41727](https://github.com/ClickHouse/ClickHouse/issues/41727) [#51355](https://github.com/ClickHouse/ClickHouse/pull/51355) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#35801](https://github.com/ClickHouse/ClickHouse/issues/35801) [#51356](https://github.com/ClickHouse/ClickHouse/pull/51356) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#34626](https://github.com/ClickHouse/ClickHouse/issues/34626) [#51357](https://github.com/ClickHouse/ClickHouse/pull/51357) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Initialize text_log earlier to capture table startup messages [#51360](https://github.com/ClickHouse/ClickHouse/pull/51360) ([Azat Khuzhin](https://github.com/azat)).
+* Use separate default settings for clickhouse-local [#51363](https://github.com/ClickHouse/ClickHouse/pull/51363) ([Azat Khuzhin](https://github.com/azat)).
+* Attempt to remove wrong code (catch/throw in Functions) [#51367](https://github.com/ClickHouse/ClickHouse/pull/51367) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove suspicious code [#51383](https://github.com/ClickHouse/ClickHouse/pull/51383) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Disable hedged requests under TSan [#51392](https://github.com/ClickHouse/ClickHouse/pull/51392) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* no finalize in d-tor WriteBufferFromOStream [#51404](https://github.com/ClickHouse/ClickHouse/pull/51404) ([Sema Checherinda](https://github.com/CheSema)).
+* Better diagnostics for 01193_metadata_loading [#51414](https://github.com/ClickHouse/ClickHouse/pull/51414) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix attaching gdb in stress tests [#51445](https://github.com/ClickHouse/ClickHouse/pull/51445) ([Kruglov Pavel](https://github.com/Avogar)).
+* Merging [#36384](https://github.com/ClickHouse/ClickHouse/issues/36384) [#51458](https://github.com/ClickHouse/ClickHouse/pull/51458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix possible race on shutdown wait [#51497](https://github.com/ClickHouse/ClickHouse/pull/51497) ([Sergei Trifonov](https://github.com/serxa)).
+* Fix `test_alter_moving_garbage`: lock between getActiveContainingPart and swapActivePart in parts mover [#51498](https://github.com/ClickHouse/ClickHouse/pull/51498) ([vdimir](https://github.com/vdimir)).
+* Fix a logical error on mutation [#51502](https://github.com/ClickHouse/ClickHouse/pull/51502) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix running integration tests with spaces in it's names [#51514](https://github.com/ClickHouse/ClickHouse/pull/51514) ([Azat Khuzhin](https://github.com/azat)).
+* Fix flaky test 00417_kill_query [#51522](https://github.com/ClickHouse/ClickHouse/pull/51522) ([Nikolay Degterinsky](https://github.com/evillique)).
+* fs cache: add some checks [#51536](https://github.com/ClickHouse/ClickHouse/pull/51536) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests [#51549](https://github.com/ClickHouse/ClickHouse/pull/51549) ([Nikita Taranov](https://github.com/nickitat)).
+* 00900_orc_load: lift kill timeout [#51559](https://github.com/ClickHouse/ClickHouse/pull/51559) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add retries to 00416_pocopatch_progress_in_http_headers [#51575](https://github.com/ClickHouse/ClickHouse/pull/51575) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix merge_selecting_task scheduling [#51591](https://github.com/ClickHouse/ClickHouse/pull/51591) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add hex functions for cityhash [#51595](https://github.com/ClickHouse/ClickHouse/pull/51595) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Remove `unset CLICKHOUSE_LOG_COMMENT` from tests [#51623](https://github.com/ClickHouse/ClickHouse/pull/51623) ([Nikita Taranov](https://github.com/nickitat)).
+* Implement endianness-independent serialization [#51637](https://github.com/ClickHouse/ClickHouse/pull/51637) ([ltrk2](https://github.com/ltrk2)).
+* Ignore APPEND and TRUNCATE modifiers if file does not exist. [#51640](https://github.com/ClickHouse/ClickHouse/pull/51640) ([alekar](https://github.com/alekar)).
+* Try to fix flaky 02210_processors_profile_log [#51641](https://github.com/ClickHouse/ClickHouse/pull/51641) ([Igor Nikonov](https://github.com/devcrafter)).
+* Make common macros extendable [#51646](https://github.com/ClickHouse/ClickHouse/pull/51646) ([Amos Bird](https://github.com/amosbird)).
+* Correct an exception message in src/Functions/nested.cpp [#51651](https://github.com/ClickHouse/ClickHouse/pull/51651) ([Alex Cheng](https://github.com/Alex-Cheng)).
+* tests: fix 02050_client_profile_events flakiness [#51653](https://github.com/ClickHouse/ClickHouse/pull/51653) ([Azat Khuzhin](https://github.com/azat)).
+* Minor follow-up to re2 update to 2023-06-02 ([#50949](https://github.com/ClickHouse/ClickHouse/issues/50949)) [#51655](https://github.com/ClickHouse/ClickHouse/pull/51655) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update timeouts in tests for transactions [#51683](https://github.com/ClickHouse/ClickHouse/pull/51683) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Remove unused code [#51684](https://github.com/ClickHouse/ClickHouse/pull/51684) ([Sergei Trifonov](https://github.com/serxa)).
+* Remove `mmap/mremap/munmap` from Allocator.h [#51686](https://github.com/ClickHouse/ClickHouse/pull/51686) ([alesapin](https://github.com/alesapin)).
+* SonarCloud: Add C++23 Experimental Flag [#51687](https://github.com/ClickHouse/ClickHouse/pull/51687) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Wait with retries when attaching GDB in tests [#51688](https://github.com/ClickHouse/ClickHouse/pull/51688) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update version_date.tsv and changelogs after v23.6.1.1524-stable [#51691](https://github.com/ClickHouse/ClickHouse/pull/51691) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* fix write to finalized buffer [#51696](https://github.com/ClickHouse/ClickHouse/pull/51696) ([Sema Checherinda](https://github.com/CheSema)).
+* do not log exception aborted for pending mutate/merge entries when shutdown [#51697](https://github.com/ClickHouse/ClickHouse/pull/51697) ([Sema Checherinda](https://github.com/CheSema)).
+* Fix race in ContextAccess [#51704](https://github.com/ClickHouse/ClickHouse/pull/51704) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Make test scripts backwards compatible [#51707](https://github.com/ClickHouse/ClickHouse/pull/51707) ([Antonio Andelic](https://github.com/antonio2368)).
+* test for full join and null predicate [#51709](https://github.com/ClickHouse/ClickHouse/pull/51709) ([Denny Crane](https://github.com/den-crane)).
+* A cmake warning on job limits underutilizing CPU [#51710](https://github.com/ClickHouse/ClickHouse/pull/51710) ([velavokr](https://github.com/velavokr)).
+* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)).
+* Added ASK_PASSWORD client constant instead of hardcoded '\n' [#51723](https://github.com/ClickHouse/ClickHouse/pull/51723) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
+* Update README.md [#51726](https://github.com/ClickHouse/ClickHouse/pull/51726) ([Tyler Hannan](https://github.com/tylerhannan)).
+* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove MemoryPool from Poco because it's useless [#51732](https://github.com/ClickHouse/ClickHouse/pull/51732) ([alesapin](https://github.com/alesapin)).
+* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)).
+* Update 01320_create_sync_race_condition_zookeeper.sh [#51742](https://github.com/ClickHouse/ClickHouse/pull/51742) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert "Fix: Invalid number of rows in Chunk column Object" [#51750](https://github.com/ClickHouse/ClickHouse/pull/51750) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add SonarCloud to README [#51751](https://github.com/ClickHouse/ClickHouse/pull/51751) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix test `02789_object_type_invalid_num_of_rows` [#51754](https://github.com/ClickHouse/ClickHouse/pull/51754) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix (benign) data race in `transform` [#51755](https://github.com/ClickHouse/ClickHouse/pull/51755) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix flaky KeeperMap test [#51764](https://github.com/ClickHouse/ClickHouse/pull/51764) ([Antonio Andelic](https://github.com/antonio2368)).
+* Version mypy=1.4.1 falsly reports unused ignore comment [#51769](https://github.com/ClickHouse/ClickHouse/pull/51769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Avoid keeping lock Context::getLock() while calculating access rights [#51772](https://github.com/ClickHouse/ClickHouse/pull/51772) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Making stateless tests with timeout less flaky [#51774](https://github.com/ClickHouse/ClickHouse/pull/51774) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix after [#51000](https://github.com/ClickHouse/ClickHouse/issues/51000) [#51790](https://github.com/ClickHouse/ClickHouse/pull/51790) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add assert in ThreadStatus destructor for correct current_thread [#51800](https://github.com/ClickHouse/ClickHouse/pull/51800) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix broken parts handling in `ReplicatedMergeTree` [#51801](https://github.com/ClickHouse/ClickHouse/pull/51801) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix tsan signal-unsafe call [#51802](https://github.com/ClickHouse/ClickHouse/pull/51802) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix for parallel replicas not completely disabled by granule count threshold [#51805](https://github.com/ClickHouse/ClickHouse/pull/51805) ([Alexander Gololobov](https://github.com/davenger)).
+* Make sure that we don't attempt to serialize/deserialize block with 0 columns and non-zero rows [#51807](https://github.com/ClickHouse/ClickHouse/pull/51807) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix rare bug in `DROP COLUMN` and enabled sparse columns [#51809](https://github.com/ClickHouse/ClickHouse/pull/51809) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix flaky `test_multiple_disks` [#51821](https://github.com/ClickHouse/ClickHouse/pull/51821) ([Antonio Andelic](https://github.com/antonio2368)).
+* Follow up to [#51547](https://github.com/ClickHouse/ClickHouse/issues/51547) [#51822](https://github.com/ClickHouse/ClickHouse/pull/51822) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Correctly grep archives in stress tests [#51824](https://github.com/ClickHouse/ClickHouse/pull/51824) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update analyzer_tech_debt.txt [#51836](https://github.com/ClickHouse/ClickHouse/pull/51836) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* remove unused code [#51837](https://github.com/ClickHouse/ClickHouse/pull/51837) ([flynn](https://github.com/ucasfl)).
+* Fix disk config for upgrade tests [#51839](https://github.com/ClickHouse/ClickHouse/pull/51839) ([Antonio Andelic](https://github.com/antonio2368)).
+* Remove Coverity from workflows, but leave in the code [#51842](https://github.com/ClickHouse/ClickHouse/pull/51842) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Many fixes [3] [#51848](https://github.com/ClickHouse/ClickHouse/pull/51848) ([Ilya Yatsishin](https://github.com/qoega)).
+* Change misleading name in joins: addJoinedBlock -> addBlockToJoin [#51852](https://github.com/ClickHouse/ClickHouse/pull/51852) ([Igor Nikonov](https://github.com/devcrafter)).
+* fix: correct exception messages on policies comparison [#51854](https://github.com/ClickHouse/ClickHouse/pull/51854) ([Feng Kaiyu](https://github.com/fky2015)).
+* Update 02439_merge_selecting_partitions.sql [#51862](https://github.com/ClickHouse/ClickHouse/pull/51862) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Remove useless packages [#51863](https://github.com/ClickHouse/ClickHouse/pull/51863) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove useless logs [#51865](https://github.com/ClickHouse/ClickHouse/pull/51865) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix incorrect log level = warning [#51867](https://github.com/ClickHouse/ClickHouse/pull/51867) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test_replicated_table_attach [#51868](https://github.com/ClickHouse/ClickHouse/pull/51868) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Better usability of a test [#51869](https://github.com/ClickHouse/ClickHouse/pull/51869) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove useless code [#51873](https://github.com/ClickHouse/ClickHouse/pull/51873) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Another fix upgrade check script [#51878](https://github.com/ClickHouse/ClickHouse/pull/51878) ([Antonio Andelic](https://github.com/antonio2368)).
+* Sqlloogic improvements [#51883](https://github.com/ClickHouse/ClickHouse/pull/51883) ([Ilya Yatsishin](https://github.com/qoega)).
+* Disable ThinLTO on non-Linux [#51897](https://github.com/ClickHouse/ClickHouse/pull/51897) ([Robert Schulze](https://github.com/rschu1ze)).
+* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)).
+* Fix build [#51909](https://github.com/ClickHouse/ClickHouse/pull/51909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix build [#51910](https://github.com/ClickHouse/ClickHouse/pull/51910) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix flaky test `00175_partition_by_ignore` and move it to correct location [#51913](https://github.com/ClickHouse/ClickHouse/pull/51913) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool [#51914](https://github.com/ClickHouse/ClickHouse/pull/51914) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Maybe better tests [#51916](https://github.com/ClickHouse/ClickHouse/pull/51916) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Revert system drop filesystem cache by key [#51917](https://github.com/ClickHouse/ClickHouse/pull/51917) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix flaky test `detach_attach_partition_race` [#51920](https://github.com/ClickHouse/ClickHouse/pull/51920) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Another fix for `02481_async_insert_race_long` [#51925](https://github.com/ClickHouse/ClickHouse/pull/51925) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix segfault caused by `ThreadStatus` [#51931](https://github.com/ClickHouse/ClickHouse/pull/51931) ([Antonio Andelic](https://github.com/antonio2368)).
+* Print short fault info only from safe fields [#51932](https://github.com/ClickHouse/ClickHouse/pull/51932) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix typo in integration tests [#51944](https://github.com/ClickHouse/ClickHouse/pull/51944) ([Ilya Yatsishin](https://github.com/qoega)).
+* Better logs on shutdown [#51951](https://github.com/ClickHouse/ClickHouse/pull/51951) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Filter databases list before querying potentially slow fields [#51955](https://github.com/ClickHouse/ClickHouse/pull/51955) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix some issues with transactions [#51959](https://github.com/ClickHouse/ClickHouse/pull/51959) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix unrelated messages from LSan in clickhouse-client [#51966](https://github.com/ClickHouse/ClickHouse/pull/51966) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow OOM in AST Fuzzer with Sanitizers [#51967](https://github.com/ClickHouse/ClickHouse/pull/51967) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Disable one test under Analyzer [#51968](https://github.com/ClickHouse/ClickHouse/pull/51968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix Docker [#51969](https://github.com/ClickHouse/ClickHouse/pull/51969) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `01825_type_json_from_map` [#51970](https://github.com/ClickHouse/ClickHouse/pull/51970) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test `02354_distributed_with_external_aggregation_memory_usage` [#51971](https://github.com/ClickHouse/ClickHouse/pull/51971) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix disaster in integration tests, part 2 [#51973](https://github.com/ClickHouse/ClickHouse/pull/51973) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* [RFC] Cleanup remote_servers in dist config.xml [#51985](https://github.com/ClickHouse/ClickHouse/pull/51985) ([Azat Khuzhin](https://github.com/azat)).
+* Update version_date.tsv and changelogs after v23.6.2.18-stable [#51986](https://github.com/ClickHouse/ClickHouse/pull/51986) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v22.8.20.11-lts [#51987](https://github.com/ClickHouse/ClickHouse/pull/51987) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix performance test for regexp cache [#51988](https://github.com/ClickHouse/ClickHouse/pull/51988) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Move a test to the right place [#51989](https://github.com/ClickHouse/ClickHouse/pull/51989) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a check to validate that the stateful tests are stateful [#51990](https://github.com/ClickHouse/ClickHouse/pull/51990) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Check that functional tests cleanup their tables [#51991](https://github.com/ClickHouse/ClickHouse/pull/51991) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix test_extreme_deduplication [#51992](https://github.com/ClickHouse/ClickHouse/pull/51992) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Cleanup SymbolIndex after reload got removed [#51993](https://github.com/ClickHouse/ClickHouse/pull/51993) ([Azat Khuzhin](https://github.com/azat)).
+* Update CompletedPipelineExecutor exception log name [#52028](https://github.com/ClickHouse/ClickHouse/pull/52028) ([xiao](https://github.com/nicelulu)).
+* Fix `00502_custom_partitioning_replicated_zookeeper_long` [#52032](https://github.com/ClickHouse/ClickHouse/pull/52032) ([Antonio Andelic](https://github.com/antonio2368)).
+* Prohibit send_metadata for s3_plain disks [#52038](https://github.com/ClickHouse/ClickHouse/pull/52038) ([Azat Khuzhin](https://github.com/azat)).
+* Update version_date.tsv and changelogs after v23.4.6.25-stable [#52061](https://github.com/ClickHouse/ClickHouse/pull/52061) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Preparations for Trivial Support For Resharding (part1) [#52068](https://github.com/ClickHouse/ClickHouse/pull/52068) ([Azat Khuzhin](https://github.com/azat)).
+* Update version_date.tsv and changelogs after v23.3.8.21-lts [#52077](https://github.com/ClickHouse/ClickHouse/pull/52077) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix flakiness of test_keeper_s3_snapshot flakiness [#52083](https://github.com/ClickHouse/ClickHouse/pull/52083) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test_extreme_deduplication flakiness [#52085](https://github.com/ClickHouse/ClickHouse/pull/52085) ([Azat Khuzhin](https://github.com/azat)).
+* Small docs update for toYearWeek() function [#52090](https://github.com/ClickHouse/ClickHouse/pull/52090) ([Andrey Zvonov](https://github.com/zvonand)).
+* Small docs update for DateTime, DateTime64 [#52094](https://github.com/ClickHouse/ClickHouse/pull/52094) ([Andrey Zvonov](https://github.com/zvonand)).
+* Add missing --force for docker network prune (otherwise it is noop on CI) [#52095](https://github.com/ClickHouse/ClickHouse/pull/52095) ([Azat Khuzhin](https://github.com/azat)).
+* tests: drop existing view in test_materialized_mysql_database [#52103](https://github.com/ClickHouse/ClickHouse/pull/52103) ([Azat Khuzhin](https://github.com/azat)).
+* Update README.md [#52115](https://github.com/ClickHouse/ClickHouse/pull/52115) ([Tyler Hannan](https://github.com/tylerhannan)).
+* Print Zxid in keeper stat command in hex (so as ZooKeeper) [#52122](https://github.com/ClickHouse/ClickHouse/pull/52122) ([Azat Khuzhin](https://github.com/azat)).
+* Skip protection from double decompression if inode from maps cannot be obtained [#52138](https://github.com/ClickHouse/ClickHouse/pull/52138) ([Azat Khuzhin](https://github.com/azat)).
+* There is no point in detecting flaky tests [#52142](https://github.com/ClickHouse/ClickHouse/pull/52142) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove default argument value [#52143](https://github.com/ClickHouse/ClickHouse/pull/52143) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix the "kill_mutation" test [#52144](https://github.com/ClickHouse/ClickHouse/pull/52144) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix ORDER BY tuple of WINDOW functions (and slightly more changes) [#52146](https://github.com/ClickHouse/ClickHouse/pull/52146) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix possible EADDRINUSE ("Address already in use") in integration tests [#52148](https://github.com/ClickHouse/ClickHouse/pull/52148) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test 02497_storage_file_reader_selection [#52154](https://github.com/ClickHouse/ClickHouse/pull/52154) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix unexpected AST Set [#52158](https://github.com/ClickHouse/ClickHouse/pull/52158) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix slow test `02317_distinct_in_order_optimization` [#52173](https://github.com/ClickHouse/ClickHouse/pull/52173) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add comments for https://github.com/ClickHouse/ClickHouse/pull/52112 [#52175](https://github.com/ClickHouse/ClickHouse/pull/52175) ([李扬](https://github.com/taiyang-li)).
+* Randomize timezone in tests across non-deterministic around 1970 and default [#52184](https://github.com/ClickHouse/ClickHouse/pull/52184) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `test_multiple_disks/test.py::test_start_stop_moves` [#52189](https://github.com/ClickHouse/ClickHouse/pull/52189) ([Antonio Andelic](https://github.com/antonio2368)).
+* CMake: Simplify job limiting [#52196](https://github.com/ClickHouse/ClickHouse/pull/52196) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix self extracting binaries under qemu linux-user (qemu-$ARCH-static) [#52198](https://github.com/ClickHouse/ClickHouse/pull/52198) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `Integration tests flaky check (asan)` [#52201](https://github.com/ClickHouse/ClickHouse/pull/52201) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix flaky test test_lost_part [#52202](https://github.com/ClickHouse/ClickHouse/pull/52202) ([alesapin](https://github.com/alesapin)).
+* MaterializedMySQL: Replace to_string by magic_enum::enum_name [#52204](https://github.com/ClickHouse/ClickHouse/pull/52204) ([Val Doroshchuk](https://github.com/valbok)).
+* MaterializedMySQL: Add tests to parse db and table names from DDL [#52208](https://github.com/ClickHouse/ClickHouse/pull/52208) ([Val Doroshchuk](https://github.com/valbok)).
+* Revert "Fixed several issues found by OSS-Fuzz" [#52216](https://github.com/ClickHouse/ClickHouse/pull/52216) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Use one copy replication more agressively [#52218](https://github.com/ClickHouse/ClickHouse/pull/52218) ([alesapin](https://github.com/alesapin)).
+* Fix flaky test `01076_parallel_alter_replicated_zookeeper` [#52221](https://github.com/ClickHouse/ClickHouse/pull/52221) ([alesapin](https://github.com/alesapin)).
+* Fix 01889_key_condition_function_chains for analyzer. [#52223](https://github.com/ClickHouse/ClickHouse/pull/52223) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Inhibit settings randomization in the test `json_ghdata` [#52226](https://github.com/ClickHouse/ClickHouse/pull/52226) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Slightly better diagnostics in a test [#52227](https://github.com/ClickHouse/ClickHouse/pull/52227) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Enable no-upgrade-check for 02273_full_sort_join [#52235](https://github.com/ClickHouse/ClickHouse/pull/52235) ([vdimir](https://github.com/vdimir)).
+* Fix network manager for integration tests [#52237](https://github.com/ClickHouse/ClickHouse/pull/52237) ([Azat Khuzhin](https://github.com/azat)).
+* List replication queue only for current test database [#52238](https://github.com/ClickHouse/ClickHouse/pull/52238) ([Alexander Gololobov](https://github.com/davenger)).
+* Attempt to fix assert in tsan with fibers [#52241](https://github.com/ClickHouse/ClickHouse/pull/52241) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix undefined behaviour in fuzzer [#52256](https://github.com/ClickHouse/ClickHouse/pull/52256) ([Antonio Andelic](https://github.com/antonio2368)).
+* Follow-up to [#51959](https://github.com/ClickHouse/ClickHouse/issues/51959) [#52261](https://github.com/ClickHouse/ClickHouse/pull/52261) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* More fair queue for `drop table sync` [#52276](https://github.com/ClickHouse/ClickHouse/pull/52276) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `02497_trace_events_stress_long` [#52279](https://github.com/ClickHouse/ClickHouse/pull/52279) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix test `01111_create_drop_replicated_db_stress` [#52283](https://github.com/ClickHouse/ClickHouse/pull/52283) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix ugly code [#52284](https://github.com/ClickHouse/ClickHouse/pull/52284) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add missing replica syncs in test_backup_restore_on_cluster [#52306](https://github.com/ClickHouse/ClickHouse/pull/52306) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix test_replicated_database 'node doesn't exist' flakiness [#52307](https://github.com/ClickHouse/ClickHouse/pull/52307) ([Michael Kolupaev](https://github.com/al13n321)).
+* Minor: Update description of events "QueryCacheHits/Misses" [#52309](https://github.com/ClickHouse/ClickHouse/pull/52309) ([Robert Schulze](https://github.com/rschu1ze)).
+* Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE [#52312](https://github.com/ClickHouse/ClickHouse/pull/52312) ([Robert Schulze](https://github.com/rschu1ze)).
+* Reduce dependencies for skim by avoid using default features [#52316](https://github.com/ClickHouse/ClickHouse/pull/52316) ([Azat Khuzhin](https://github.com/azat)).
+* Fix 02725_memory-for-merges [#52317](https://github.com/ClickHouse/ClickHouse/pull/52317) ([alesapin](https://github.com/alesapin)).
+* Skip unsupported disks in Keeper [#52321](https://github.com/ClickHouse/ClickHouse/pull/52321) ([Antonio Andelic](https://github.com/antonio2368)).
+* Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" [#52322](https://github.com/ClickHouse/ClickHouse/pull/52322) ([Kruglov Pavel](https://github.com/Avogar)).
+* Resubmit [#51716](https://github.com/ClickHouse/ClickHouse/issues/51716) [#52323](https://github.com/ClickHouse/ClickHouse/pull/52323) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add logging about all found workflows for merge_pr.py [#52324](https://github.com/ClickHouse/ClickHouse/pull/52324) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Minor: Less awkward IAST::FormatSettings [#52332](https://github.com/ClickHouse/ClickHouse/pull/52332) ([Robert Schulze](https://github.com/rschu1ze)).
+* Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness [#52338](https://github.com/ClickHouse/ClickHouse/pull/52338) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix capabilities installed via systemd service (fixes netlink/IO priorities) [#52357](https://github.com/ClickHouse/ClickHouse/pull/52357) ([Azat Khuzhin](https://github.com/azat)).
+* Update 01606_git_import.sh [#52360](https://github.com/ClickHouse/ClickHouse/pull/52360) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Update ci-slack-bot.py [#52372](https://github.com/ClickHouse/ClickHouse/pull/52372) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `test_keeper_session` [#52373](https://github.com/ClickHouse/ClickHouse/pull/52373) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update ci-slack-bot.py [#52374](https://github.com/ClickHouse/ClickHouse/pull/52374) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Disable analyzer setting in backward_compatibility integration tests. [#52375](https://github.com/ClickHouse/ClickHouse/pull/52375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* New metric - Filesystem cache size limit [#52378](https://github.com/ClickHouse/ClickHouse/pull/52378) ([Krzysztof Góralski](https://github.com/kgoralski)).
+* Fix `test_replicated_merge_tree_encrypted_disk ` [#52379](https://github.com/ClickHouse/ClickHouse/pull/52379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix `02122_parallel_formatting_XML ` [#52380](https://github.com/ClickHouse/ClickHouse/pull/52380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Follow up to [#49698](https://github.com/ClickHouse/ClickHouse/issues/49698) [#52381](https://github.com/ClickHouse/ClickHouse/pull/52381) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Rename TaskStatsInfoGetter into NetlinkMetricsProvider [#52392](https://github.com/ClickHouse/ClickHouse/pull/52392) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `test_keeper_force_recovery` [#52408](https://github.com/ClickHouse/ClickHouse/pull/52408) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix flaky gtest_lru_file_cache.cpp [#52418](https://github.com/ClickHouse/ClickHouse/pull/52418) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix: remove redundant distinct with views [#52438](https://github.com/ClickHouse/ClickHouse/pull/52438) ([Igor Nikonov](https://github.com/devcrafter)).
+* Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt [#52464](https://github.com/ClickHouse/ClickHouse/pull/52464) ([vdimir](https://github.com/vdimir)).
+* do not throw exception in OptimizedRegularExpressionImpl::analyze [#52467](https://github.com/ClickHouse/ClickHouse/pull/52467) ([Han Fei](https://github.com/hanfei1991)).
+* Remove skip_startup_tables from IDatabase::loadStoredObjects() [#52491](https://github.com/ClickHouse/ClickHouse/pull/52491) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test_insert_same_partition_and_merge by increasing wait time [#52497](https://github.com/ClickHouse/ClickHouse/pull/52497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Try to fix asan wanring in HashJoin [#52499](https://github.com/ClickHouse/ClickHouse/pull/52499) ([Igor Nikonov](https://github.com/devcrafter)).
+* Replace with three way comparison [#52509](https://github.com/ClickHouse/ClickHouse/pull/52509) ([flynn](https://github.com/ucasfl)).
+* Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop [#52514](https://github.com/ClickHouse/ClickHouse/pull/52514) ([Azat Khuzhin](https://github.com/azat)).
+* Fix `test_throttling` [#52515](https://github.com/ClickHouse/ClickHouse/pull/52515) ([Antonio Andelic](https://github.com/antonio2368)).
+* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix `toDecimalString` function [#52520](https://github.com/ClickHouse/ClickHouse/pull/52520) ([Andrey Zvonov](https://github.com/zvonand)).
+* Remove unused code [#52527](https://github.com/ClickHouse/ClickHouse/pull/52527) ([Raúl Marín](https://github.com/Algunenano)).
+* Cancel execution in PipelineExecutor in case of exception in graph->updateNode [#52533](https://github.com/ClickHouse/ClickHouse/pull/52533) ([Kruglov Pavel](https://github.com/Avogar)).
+* Make 01951_distributed_push_down_limit analyzer agnostic [#52534](https://github.com/ClickHouse/ClickHouse/pull/52534) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix disallow_concurrency test for backup and restore [#52536](https://github.com/ClickHouse/ClickHouse/pull/52536) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Update 02136_scalar_subquery_metrics.sql [#52537](https://github.com/ClickHouse/ClickHouse/pull/52537) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* tests: fix 01035_avg_weighted_long flakiness [#52556](https://github.com/ClickHouse/ClickHouse/pull/52556) ([Azat Khuzhin](https://github.com/azat)).
+* tests: increase throttling for 01923_network_receive_time_metric_insert [#52557](https://github.com/ClickHouse/ClickHouse/pull/52557) ([Azat Khuzhin](https://github.com/azat)).
+* tests: fix 00719_parallel_ddl_table flakiness in debug builds [#52558](https://github.com/ClickHouse/ClickHouse/pull/52558) ([Azat Khuzhin](https://github.com/azat)).
+* tests: fix 01821_join_table_race_long flakiness [#52559](https://github.com/ClickHouse/ClickHouse/pull/52559) ([Azat Khuzhin](https://github.com/azat)).
+* Fix flaky `00995_exception_while_insert` [#52568](https://github.com/ClickHouse/ClickHouse/pull/52568) ([Antonio Andelic](https://github.com/antonio2368)).
+* MaterializedMySQL: Fix typos in tests [#52575](https://github.com/ClickHouse/ClickHouse/pull/52575) ([Val Doroshchuk](https://github.com/valbok)).
+* Fix `02497_trace_events_stress_long` again [#52587](https://github.com/ClickHouse/ClickHouse/pull/52587) ([Antonio Andelic](https://github.com/antonio2368)).
+* Revert "Remove `mmap/mremap/munmap` from Allocator.h" [#52589](https://github.com/ClickHouse/ClickHouse/pull/52589) ([Nikita Taranov](https://github.com/nickitat)).
+* Remove peak memory usage from the final message in the client [#52598](https://github.com/ClickHouse/ClickHouse/pull/52598) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* GinIndexStore: fix a bug when files are finalizated after first write, [#52602](https://github.com/ClickHouse/ClickHouse/pull/52602) ([Sema Checherinda](https://github.com/CheSema)).
+* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix build with clang-15 [#52627](https://github.com/ClickHouse/ClickHouse/pull/52627) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix style [#52647](https://github.com/ClickHouse/ClickHouse/pull/52647) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix logging level of a noisy message [#52648](https://github.com/ClickHouse/ClickHouse/pull/52648) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Revert "Added field `refcount` to `system.remote_data_paths` table" [#52657](https://github.com/ClickHouse/ClickHouse/pull/52657) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md
index 0501c1cbdcb..4e01b41ab3c 100644
--- a/docs/en/development/building_and_benchmarking_deflate_qpl.md
+++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md
@@ -7,12 +7,8 @@ description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
# Build Clickhouse with DEFLATE_QPL
-- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
-- Pass the following flag to CMake when building ClickHouse:
-
-``` bash
-cmake -DENABLE_QPL=1 ..
-```
+- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
+- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index 0e2b48ef6a6..051945538b2 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -57,7 +57,8 @@ Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter tok
:::note
As shown in the example, querying from S3 tables that are partitioned is
-not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard.
+not directly supported at this time, but can be accomplished by querying the individual partitions
+using the S3 table function.
The primary use-case for writing
partitioned data in S3 is to enable transferring that data into another
@@ -127,23 +128,7 @@ FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminp
└────┴────┴────┘
```
-#### Select from all partitions
-
-```sql
-SELECT *
-FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV')
-```
-```response
-┌─c1─┬─c2─┬─c3─┐
-│ 3 │ 2 │ 1 │
-└────┴────┴────┘
-┌─c1─┬─c2─┬─c3─┐
-│ 1 │ 2 │ 3 │
-└────┴────┴────┘
-┌─c1─┬─c2─┬─c3─┐
-│ 78 │ 43 │ 45 │
-└────┴────┴────┘
-```
+#### Limitation
You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query.
diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md
index 26d4975954f..f556df0a088 100644
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 012fa23e6d4..15f9d1f47bf 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -473,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
+- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames}
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 3a7f6d4d854..37821f0fee1 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -56,7 +56,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -286,9 +286,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Possible header fields:
@@ -416,7 +416,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@@ -581,7 +581,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -621,7 +621,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -673,7 +673,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -692,7 +692,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md
index d3e21cb2364..a19c55673ed 100644
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@@ -65,6 +65,43 @@ XML substitution example:
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
+## Encrypting Configuration {#encryption}
+
+You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt.
+
+Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
+
+Example:
+
+```xml
+
+
+
+
+ 00112233445566778899aabbccddeeff
+
+
+
+
+ admin
+ 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+
+
+
+```
+
+To encrypt a value, you can use the (example) program `encrypt_decrypt`:
+
+Example:
+
+``` bash
+./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
+```
+
+``` text
+961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+```
+
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
@@ -104,12 +141,17 @@ Here you can see default config written in YAML: [config.yaml.example](https://g
There are some differences between YAML and XML formats in terms of ClickHouse configurations. Here are some tips for writing a configuration in YAML format.
-You should use a Scalar node to write a key-value pair:
+An XML tag with a text value is represented by a YAML key-value pair
``` yaml
key: value
```
-To create a node, containing other nodes you should use a Map:
+Corresponding XML:
+``` xml
+value
+```
+
+A nested XML node is represented by a YAML map:
``` yaml
map_key:
key1: val1
@@ -117,7 +159,16 @@ map_key:
key3: val3
```
-To create a list of values or nodes assigned to one tag you should use a Sequence:
+Corresponding XML:
+``` xml
+
+ val1
+ val2
+ val3
+
+```
+
+To create the same XML tag multiple times, use a YAML sequence:
``` yaml
seq_key:
- val1
@@ -128,8 +179,22 @@ seq_key:
key3: val5
```
-If you want to write an attribute for a Sequence or Map node, you should use a @ prefix before the attribute key. Note, that @ is reserved by YAML standard, so you should also to wrap it into double quotes:
+Corresponding XML:
+```xml
+val1
+val2
+
+ val3
+
+
+
+
+```
+To provide an XML attribute, you can use an attribute key with a `@` prefix. Note that `@` is reserved by YAML standard, so must be wrapped in double quotes:
``` yaml
map:
"@attr1": value1
@@ -137,16 +202,14 @@ map:
key: 123
```
-From that Map we will get these XML nodes:
-
+Corresponding XML:
``` xml
```
-You can also set attributes for Sequence:
-
+It is also possible to use attributes in YAML sequence:
``` yaml
seq:
- "@attr1": value1
@@ -155,13 +218,25 @@ seq:
- abc
```
-So, we can get YAML config equal to this XML one:
-
+Corresponding XML:
``` xml
123
abc
```
+The aforementioned syntax does not allow to express XML text nodes with XML attributes as YAML. This special case can be achieved using an
+`#text` attribute key:
+```yaml
+map_key:
+ "@attr1": value1
+ "#text": value2
+```
+
+Corresponding XML:
+```xml
+value2
+```
+
## Implementation Details {#implementation-details}
For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file.
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index bfa51650cd8..d0b785d8fda 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -61,9 +61,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i
may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
-`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table
-`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not
-affect the cache miss counter.
+`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and
+"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run
+with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_log_usage` in system table
+[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the
+query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table
+[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains.
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
changed (see below) but doing so is not recommended for security reasons.
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index a6ae517e401..e9f0f0dae00 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -512,7 +512,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
cache
local_disk
/tiny_local_cache/
- 10M
+ 10M
1M
1
0
@@ -1592,6 +1592,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
7500
1000
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1695,6 +1699,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1706,6 +1718,10 @@ Use the following parameters to configure logging:
toMonday(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1773,6 +1789,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1786,6 +1810,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1831,6 +1859,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1844,6 +1880,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
toMonday(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1861,6 +1901,14 @@ Use the following parameters to configure logging:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1874,6 +1922,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1890,6 +1942,14 @@ Parameters:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1901,13 +1961,16 @@ Parameters:
system
7500
+ 1048576
+ 8192
+ 524288
+ false
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
```
-
## trace_log {#server_configuration_parameters-trace_log}
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
@@ -1920,6 +1983,12 @@ Parameters:
- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
- `storage_policy` – Name of storage policy to use for the table (optional)
- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
@@ -1931,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1945,9 +2018,18 @@ Parameters:
- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined.
- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
- `storage_policy` – Name of storage policy to use for the table (optional)
**Example**
+
```xml
@@ -1955,11 +2037,53 @@ Parameters:
7500
toYYYYMM(event_date)
+ 1048576
+ 8192
+ 524288
+ false
```
+## crash_log {#server_configuration_parameters-crash_log}
+
+Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation.
+
+Parameters:
+
+- `database` — Database for storing a table.
+- `table` — Table name.
+- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined.
+- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined.
+- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined.
+- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk.
+Default: 1048576.
+- `reserved_size_rows` – Pre-allocated memory size in lines for the logs.
+Default: 8192.
+- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background.
+Default: `max_size_rows / 2`.
+- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash.
+Default: false.
+- `storage_policy` – Name of storage policy to use for the table (optional)
+- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional).
+
+The default server configuration file `config.xml` contains the following settings section:
+
+``` xml
+
+ system
+
+ toYYYYMM(event_date)
+ 7500
+ 1024
+ 1024
+ 512
+ false
+
+```
+
## query_masking_rules {#query-masking-rules}
Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs,
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index 0915c51806a..fb10ff7f61b 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -989,6 +989,28 @@ Result
a b
```
+### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
+
+Allow to set default value to column when CSV field deserialization failed on bad value
+
+Default value: `false`.
+
+**Examples**
+
+Query
+
+```bash
+./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
+echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
+./clickhouse local -q "select * from test_tbl"
+```
+
+Result
+
+```text
+a 0 1971-01-01
+```
+
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@@ -1142,7 +1164,7 @@ Enabled by default.
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
-Default value: `none`.
+Default value: `lz4_frame`.
## ORC format settings {#orc-format-settings}
@@ -1325,6 +1347,17 @@ Default value: 0.
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
+Format:
+``` text
+http://[user:password@]machine[:port]"
+```
+
+Examples:
+``` text
+http://registry.example.com:8081
+http://admin:secret@registry.example.com:8081
+```
+
Default value: `Empty`.
### output_format_avro_codec {#output_format_avro_codec}
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 22aeecf4335..8dfb6c0d225 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -537,6 +537,8 @@ Possible values:
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
+ Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
+
- hash
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
@@ -3466,6 +3468,12 @@ Possible values:
Default value: `0`.
+## disable_url_encoding {#disable_url_encoding}
+
+Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
+
+Disabled by default.
+
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md
index f357341da67..e46b495239c 100644
--- a/docs/en/operations/system-tables/asynchronous_metrics.md
+++ b/docs/en/operations/system-tables/asynchronous_metrics.md
@@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
└─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
+
+
## Metric descriptions
@@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b
Number of threads in the server of the PostgreSQL compatibility protocol.
+### QueryCacheBytes
+
+Total size of the query cache cache in bytes.
+
+### QueryCacheEntries
+
+Total number of entries in the query cache.
+
### ReplicasMaxAbsoluteDelay
Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data.
diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md
index ba5602ee292..7846fe4be5d 100644
--- a/docs/en/operations/system-tables/events.md
+++ b/docs/en/operations/system-tables/events.md
@@ -11,6 +11,8 @@ Columns:
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
+You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp).
+
**Example**
``` sql
diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md
index 1b720098fc7..a46f306f677 100644
--- a/docs/en/operations/system-tables/index.md
+++ b/docs/en/operations/system-tables/index.md
@@ -47,6 +47,10 @@ An example:
ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024
-->
7500
+ 1048576
+ 8192
+ 524288
+ false
```
diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index d8539908bf7..557835ce3b6 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables.
Columns:
-- `name` (String) — Setting name.
-- `value` (String) — Setting value.
-- `description` (String) — Setting description.
-- `type` (String) — Setting type (implementation specific string value).
-- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
+- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
+- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
+- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
+- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
+- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
+ - `0` — Current user can change the setting.
+ - `1` — Current user can’t change the setting.
+- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
```sql
@@ -21,35 +27,51 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
```response
Row 1:
──────
+name: min_compress_block_size
+value: 0
+changed: 0
+description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
+
+Row 2:
+──────
+name: max_compress_block_size
+value: 0
+changed: 0
+description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
+
+Row 3:
+──────
name: index_granularity
value: 8192
changed: 0
description: How many rows correspond to one primary key value.
-type: SettingUInt64
-
-Row 2:
-──────
-name: min_bytes_for_wide_part
-value: 0
-changed: 0
-description: Minimal uncompressed size in bytes to create part in wide format instead of compact
-type: SettingUInt64
-
-Row 3:
-──────
-name: min_rows_for_wide_part
-value: 0
-changed: 0
-description: Minimal number of rows to create part in wide format instead of compact
-type: SettingUInt64
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
Row 4:
──────
-name: merge_max_block_size
-value: 8192
+name: max_digestion_size_per_segment
+value: 268435456
changed: 0
-description: How many rows in blocks should be formed for merge operations.
-type: SettingUInt64
+description: Max number of bytes to digest per segment to build GIN index.
+min: ____
+max: ____
+readonly: 0
+type: UInt64
+is_obsolete: 0
-4 rows in set. Elapsed: 0.001 sec.
+4 rows in set. Elapsed: 0.009 sec.
```
diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md
index 5a7dfd03eb4..b1dcea5500f 100644
--- a/docs/en/operations/system-tables/metrics.md
+++ b/docs/en/operations/system-tables/metrics.md
@@ -11,7 +11,7 @@ Columns:
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description.
-The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse.
+You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
**Example**
diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md
index b9fdd19c643..c6f565b8748 100644
--- a/docs/en/operations/system-tables/query_log.md
+++ b/docs/en/operations/system-tables/query_log.md
@@ -111,6 +111,11 @@ Columns:
- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution.
- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution.
- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution.
+- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values:
+ - `'Unknown'` = Status unknown.
+ - `'None'` = The query result was neither written into nor read from the query cache.
+ - `'Write'` = The query result was written into the query cache.
+ - `'Read'` = The query result was read from the query cache.
**Example**
@@ -186,6 +191,7 @@ used_formats: []
used_functions: []
used_storages: []
used_table_functions: []
+query_cache_usage: None
```
**See Also**
diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md
index 3085b1acaf4..df482261ae8 100644
--- a/docs/en/operations/system-tables/server_settings.md
+++ b/docs/en/operations/system-tables/server_settings.md
@@ -14,6 +14,7 @@ Columns:
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
@@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%'
```
``` text
-┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
-│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
-│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
-│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
-│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
-│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
-│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
-└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
+┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description──────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────_─type───_─is_obsolete─┐
+│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │
+│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │
+│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │
+│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │
+│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │
+│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │
+│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │
+│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │
+│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │
+│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │
+│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │
+│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │
+└─────────────────────────────────────────────┴───────┴─────────┴─────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+───────────────────────────────────┴────────┴─────────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check
diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md
index afae45077cc..7dd2345a2d0 100644
--- a/docs/en/operations/system-tables/settings.md
+++ b/docs/en/operations/system-tables/settings.md
@@ -17,6 +17,7 @@ Columns:
- `0` — Current user can change the setting.
- `1` — Current user can’t change the setting.
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
+- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete.
**Example**
@@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%'
```
``` text
-┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐
-│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
-└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
+┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐
+│ min_insert_block_size_rows │ 1048449 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 1048449 │ │ 0 │
+│ min_insert_block_size_bytes │ 268402944 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 268402944 │ │ 0 │
+│ min_insert_block_size_rows_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
+│ min_insert_block_size_bytes_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │
+│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │ 0 │ Milliseconds │ 1000 │ │ 0 │
+└────────────────────────────────────────────────────┴───────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check:
diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md
index db19f524b31..f79fe66c05d 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/any.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/any.md
@@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins
In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
+
+- Alias: `any_value`
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 64fae0e82f0..69f1816b7df 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -6,9 +6,20 @@ sidebar_label: Arithmetic
# Arithmetic Functions
-The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit.
+Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`.
-The result of addition or multiplication of two integers is unsigned unless one of the integers is signed.
+Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified
+differently in the function documentation below):
+- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the
+ two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`.
+- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For
+ example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`.
+- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`.
+
+These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk
+of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of
+64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer
+type.
Example:
@@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0
└───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘
```
-Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values.
-
Overflows are produced the same way as in C++.
## plus
@@ -68,7 +77,7 @@ Alias: `a \* b` (operator)
## divide
-Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function.
+Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function.
Division by 0 returns `inf`, `-inf`, or `nan`.
@@ -84,7 +93,7 @@ Alias: `a / b` (operator)
Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer.
-The result has the same type as the dividend (the first parameter).
+The result has the same width as the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
@@ -135,7 +144,7 @@ intDivOrZero(a, b)
Calculates the remainder of the division of two values `a` by `b`.
-The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number.
+The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md).
The remainder is computed like in C++. Truncated division is used for negative numbers.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index ce1a4f4d283..87d84425029 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1138,6 +1138,8 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression.
+Alias: `current_timestamp`.
+
**Syntax**
``` sql
@@ -1268,6 +1270,8 @@ Result:
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as ‘toDate(now())’.
+Aliases: `curdate`, `current_date`.
+
## yesterday
Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis.
@@ -1445,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %n | new-line character (‘’) | |
| %p | AM or PM designation | PM |
| %Q | Quarter (1-4) | 1 |
-| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM |
+| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM |
| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 |
| %s | second (00-59) | 44 |
| %S | second (00-59) | 44 |
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 9890d257e84..527ce2434c0 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -575,6 +575,42 @@ Alias:
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+## substringIndex(s, delim, count)
+
+Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.
+
+**Syntax**
+
+```sql
+substringIndex(s, delim, count)
+```
+Alias: `SUBSTRING_INDEX`
+
+
+**Arguments**
+
+- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
+- delim: The character to split. [String](../../sql-reference/data-types/string.md).
+- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+
+**Example**
+
+``` sql
+SELECT substringIndex('www.clickhouse.com', '.', 2)
+```
+
+Result:
+```
+┌─substringIndex('www.clickhouse.com', '.', 2)─┐
+│ www.clickhouse │
+└──────────────────────────────────────────────┘
+```
+
+## substringIndexUTF8(s, delim, count)
+
+Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
## appendTrailingCharIfAbsent
Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`.
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 3d8f89f7295..c10a1036677 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -631,3 +631,53 @@ Result:
│ 100 │ 200 │ 100-200 │ 100 │
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
```
+
+## hasSubsequence
+
+Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
+A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
+
+
+**Syntax**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Arguments**
+
+- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
+
+**Returned values**
+
+- 1, if needle is a subsequence of haystack.
+- 0, otherwise.
+
+Type: `UInt8`.
+
+**Examples**
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Result:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│ 1 │
+└──────────────────────────────────┘
+```
+
+## hasSubsequenceCaseInsensitive
+
+Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
+
+## hasSubsequenceUTF8
+
+Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
\ No newline at end of file
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index dae2c7dd1d3..6ceb9b5849e 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
Syntax:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Example**
diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 336b93db9d5..1c399d2072b 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
The optional keyword `FULL` causes the output to include the collation, comment and privilege columns.
-`SHOW COLUMNS` produces a result table with the following structure:
+The statement produces a result table with the following structure:
- field - The name of the column (String)
- type - The column data type (String)
- null - If the column data type is Nullable (UInt8)
@@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
Displays a list of primary and data skipping indexes of a table.
+This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for
+primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices)
+provide equivalent information but in a fashion more native to ClickHouse.
+
```sql
SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} [{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ]
```
@@ -281,22 +285,22 @@ equivalent. If no database is specified, the query assumes the current database
The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
-`SHOW INDEX` produces a result table with the following structure:
-- table - The name of the table (String)
-- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
-- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
-- seq_in_index - Currently unused
-- column_name - Currently unused
-- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
-- cardinality - Currently unused
-- sub_part - Currently unused
-- packed - Currently unused
+The statement produces a result table with the following structure:
+- table - The name of the table. (String)
+- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8)
+- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String)
+- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8)
+- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String)
+- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String))
+- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64)
+- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String))
+- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String))
- null - Currently unused
-- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
-- comment - Currently unused
-- index_comment - Currently unused
-- visible - If the index is visible to the optimizer, always `YES` (String)
-- expression - The index expression (String)
+- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String)
+- comment - Additional information about the index, currently always `''` (empty string). (String)
+- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String)
+- visible - If the index is visible to the optimizer, always `YES`. (String)
+- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String)
**Examples**
@@ -310,11 +314,12 @@ Result:
``` text
┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
-│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │
-│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │
-│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │
-│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │
-│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │
+│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │
+│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │
+│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │
+│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │
+│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │
+│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │
└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
```
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 65a35f03fbe..fb601cd5d35 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -414,3 +414,29 @@ Will do sync syscall.
```sql
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
+
+
+### SYSTEM STOP LISTEN
+
+Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
+
+However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect.
+
+```sql
+SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
+```
+
+- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
+- If `QUERIES ALL` modifier is specified, all protocols are stopped.
+- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
+- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
+
+### SYSTEM START LISTEN
+
+Allows new connections to be established on the specified protocols.
+
+However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect.
+
+```sql
+SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
+```
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 2ab43f1b895..677ed011960 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
**See Also**
diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md
index 68fbfe0b22a..cb89a091d68 100644
--- a/docs/en/sql-reference/transactions.md
+++ b/docs/en/sql-reference/transactions.md
@@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional
---
# Transactional (ACID) support
-INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID):
-- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed.
+## Case 1: INSERT into one partition, of one table, of the MergeTree* family
+
+This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
+- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
-- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen;
-- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
-* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own;
-* INSERT into multiple tables with one statement is possible if materialized views are involved;
-* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional;
-* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable;
-* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting;
-* max_insert_block_size is 1 000 000 by default and can be adjusted as needed;
-* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties;
-* ClickHouse is using MVCC with snapshot isolation internally;
-* all ACID properties are valid even in case of server kill / crash;
-* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup;
-* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
-* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc.
+- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
+- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
+- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
+
+## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own
+
+
+## Case 3: INSERT into one distributed table of the MergeTree* family
+
+Same as Case 1 above, with this detail:
+- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional
+
+## Case 4: Using a Buffer table
+
+- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable
+
+## Case 5: Using async_insert
+
+Same as Case 1 above, with this detail:
+- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured.
+
+## Notes
+- rows inserted from the client in some data format are packed into a single block when:
+ - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default)
+ - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
+- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
+- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
+- ClickHouse is using MVCC with snapshot isolation internally
+- all ACID properties are valid even in the case of server kill/crash
+- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
+- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
+- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback)
## Transactions, Commit, and Rollback
diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md
index 9a1f9c9347d..6b4e612b13f 100644
--- a/docs/ru/development/build-osx.md
+++ b/docs/ru/development/build-osx.md
@@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/
$ rm -rf build
$ mkdir build
$ cd build
- $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
+ $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md
index b8c5ee77f0c..981f1c7b5a2 100644
--- a/docs/ru/interfaces/http.md
+++ b/docs/ru/interfaces/http.md
@@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md
index 2b824ce91bd..085761d80c7 100644
--- a/docs/ru/operations/configuration-files.md
+++ b/docs/ru/operations/configuration-files.md
@@ -85,6 +85,43 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
+## Шифрование {#encryption}
+
+Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать.
+
+В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
+
+Пример:
+
+```xml
+
+
+
+
+ 00112233445566778899aabbccddeeff
+
+
+
+
+ admin
+ 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+
+
+
+```
+
+Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
+
+Пример:
+
+``` bash
+./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
+```
+
+``` text
+961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
+```
+
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index 421df3fe3eb..81a696bcfc1 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -1058,6 +1058,10 @@ ClickHouse использует потоки из глобального пул
7500
1000
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1155,12 +1159,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
-
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
**Пример**
``` xml
@@ -1169,6 +1180,10 @@ ClickHouse использует потоки из глобального пул
toMonday(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1218,11 +1233,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы, куда будет записываться лог;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1234,6 +1257,10 @@ ClickHouse использует потоки из глобального пул
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1245,11 +1272,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` — имя базы данных;
-- `table` — имя таблицы, куда будет записываться лог;
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1261,6 +1296,10 @@ ClickHouse использует потоки из глобального пул
toMonday(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1272,11 +1311,19 @@ ClickHouse использует потоки из глобального пул
При настройке логирования используются следующие параметры:
-- `database` – имя базы данных.
-- `table` – имя системной таблицы, где будут логироваться запросы.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`.
-- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.
@@ -1288,6 +1335,10 @@ ClickHouse использует потоки из глобального пул
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
```
@@ -1297,12 +1348,20 @@ ClickHouse использует потоки из глобального пул
Параметры:
-- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
-- `database` — имя базы данных для хранения таблицы.
-- `table` — имя таблицы, куда будут записываться текстовые сообщения.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
**Пример**
```xml
@@ -1312,6 +1371,10 @@ ClickHouse использует потоки из глобального пул
system
7500
+ 1048576
+ 8192
+ 524288
+ false
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day
@@ -1323,13 +1386,21 @@ ClickHouse использует потоки из глобального пул
Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
-Parameters:
+Параметры:
-- `database` — Database for storing a table.
-- `table` — Table name.
-- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
-- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
-- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table.
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
По умолчанию файл настроек сервера `config.xml` содержит следующие настройки:
@@ -1339,9 +1410,84 @@ Parameters:
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
```
+## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log}
+
+Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок.
+
+Параметры:
+
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1048576.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 8192.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: false.
+
+**Пример**
+
+```xml
+
+
+ system
+
+ 7500
+ toYYYYMM(event_date)
+ 1048576
+ 8192
+ 524288
+
+
+
+```
+
+## crash_log {#server_configuration_parameters-crash_log}
+
+Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md).
+
+Параметры:
+
+- `database` — имя базы данных;
+- `table` — имя таблицы;
+- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine`
+- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
+- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
+- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск.
+Значение по умолчанию: 1024.
+- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами.
+Значение по умолчанию: 1024.
+- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме.
+Значение по умолчанию: `max_size / 2`.
+- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы.
+Значение по умолчанию: true.
+
+**Пример**
+
+``` xml
+
+ system
+
+ toYYYYMM(event_date)
+ 7500
+ 1024
+ 1024
+ 512
+ true
+
+```
+
## query_masking_rules {#query-masking-rules}
Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере,
diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md
index 7ff368b1910..24f79cae212 100644
--- a/docs/ru/operations/system-tables/index.md
+++ b/docs/ru/operations/system-tables/index.md
@@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы"
ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024
-->
7500
+ 1048576
+ 8192
+ 524288
+ false
```
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index ea4f90d4f66..6e3830869cd 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
│ 3 │
└────────────────────────────────────────────────────────────┘
```
+
+## hasSubsequence(haystack, needle) {#hasSubsequence}
+
+Возвращает 1 если needle является подпоследовательностью haystack, иначе 0.
+
+
+**Синтаксис**
+
+``` sql
+hasSubsequence(haystack, needle)
+```
+
+**Аргументы**
+
+- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
+- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal).
+
+**Возвращаемые значения**
+
+- 1, если
+- 0, если подстрока не найдена.
+
+Тип: `UInt8`.
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT hasSubsequence('garbage', 'arg') ;
+```
+
+Результат:
+
+``` text
+┌─hasSubsequence('garbage', 'arg')─┐
+│ 1 │
+└──────────────────────────────────┘
+```
+
+
+## hasSubsequenceCaseInsensitive
+
+Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра.
+
+## hasSubsequenceUTF8
+
+Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8.
+
+## hasSubsequenceCaseInsensitiveUTF8
+
+Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра.
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index a8ace213075..92be30b101a 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Синтаксис:
```sql
-ALTER TABLE table_name MODIFY column_name REMOVE property;
+ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Пример**
diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md
index c7a0f355a92..f84768beccc 100644
--- a/docs/zh/interfaces/http.md
+++ b/docs/zh/interfaces/http.md
@@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
-X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
-X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
-X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
+X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
+X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
+X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
显示字段信息:
@@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
%
@@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Absolute Path File
* Connection #0 to host localhost left intact
@@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
-< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
+< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
Relative Path File
* Connection #0 to host localhost left intact
diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 7742d8b278a..42dc5bd380d 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
+AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index f791c39bad1..e73f77819ad 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query)
}
catch (...)
{
+ if (!ast_to_process)
+ fmt::print(stderr,
+ "Error while forming new query: {}\n",
+ getCurrentExceptionMessage(true));
+
// Some functions (e.g. protocol parsers) don't throw, but
// set last_exception instead, so we'll also do it here for
// uniformity.
@@ -1173,12 +1178,12 @@ void Client::processOptions(const OptionsDescription & options_description,
{
String traceparent = options["opentelemetry-traceparent"].as();
String error;
- if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error))
+ if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error);
}
if (options.count("opentelemetry-tracestate"))
- global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as();
+ global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as();
}
@@ -1238,10 +1243,9 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
- ClientInfo & client_info = global_context->getClientInfo();
- client_info.setInitialQuery();
- client_info.quota_key = config().getString("quota_key", "");
- client_info.query_kind = query_kind;
+ global_context->setQueryKindInitial();
+ global_context->setQuotaClientKey(config().getString("quota_key", ""));
+ global_context->setQueryKind(query_kind);
}
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index d83e189f7ef..d7086c95beb 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -20,10 +20,7 @@
#include
#include
#include
-#include
-#include
#include
-#include
#include
#include
#include
@@ -35,6 +32,14 @@
#include
+#include
+
+#include "config.h"
+
+/// Embedded configuration files used inside the install program
+INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
+INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
+
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init.
@@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file))
{
- std::string_view main_config_content = getResource("config.xml");
+ std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty())
{
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file))
{
- std::string_view users_config_content = getResource("users.xml");
+ std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty())
{
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt
index 54c39f5709f..43a8d84b513 100644
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@@ -1,16 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
-if (OS_LINUX)
- set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
- # for some reason INTERFACE linkage doesn't work for standalone binary
- set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
-endif ()
-
-clickhouse_embed_binaries(
- TARGET clickhouse_keeper_configs
- RESOURCES keeper_config.xml keeper_embedded.xml
-)
-
set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp
)
@@ -29,11 +16,12 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_program_add(keeper)
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
-add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources
set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp
@@ -77,6 +65,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
@@ -92,6 +81,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
@@ -213,7 +203,6 @@ if (BUILD_STANDALONE_KEEPER)
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
)
- add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS)
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 6034d63a016..a38467c3369 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -457,8 +457,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector extra_paths = {include_from_path};
- if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
- if (!key_path.empty()) extra_paths.emplace_back(key_path);
+ if (!cert_path.empty())
+ extra_paths.emplace_back(cert_path);
+ if (!key_path.empty())
+ extra_paths.emplace_back(key_path);
/// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique(
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 96924e3c8d9..587c88a2745 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -266,6 +266,10 @@ void LocalServer::tryInitPath()
global_context->setUserFilesPath(""); // user's files are everywhere
+ std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
+ global_context->setUserScriptsPath(user_scripts_path);
+ fs::create_directories(user_scripts_path);
+
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
@@ -490,6 +494,17 @@ try
applyCmdSettings(global_context);
+ /// try to load user defined executable functions, throw on error and die
+ try
+ {
+ global_context->loadOrReloadUserDefinedExecutableFunctions(config());
+ }
+ catch (...)
+ {
+ tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions.");
+ throw;
+ }
+
if (is_interactive)
{
clearTerminal();
@@ -569,7 +584,9 @@ void LocalServer::processConfig()
}
print_stack_trace = config().getBool("stacktrace", false);
- load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false);
+ const std::string clickhouse_dialect{"clickhouse"};
+ load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
+ && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
auto logging = (config().has("logger.console")
|| config().has("logger.level")
@@ -737,9 +754,8 @@ void LocalServer::processConfig()
for (const auto & [key, value] : prompt_substitutions)
boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
- ClientInfo & client_info = global_context->getClientInfo();
- client_info.setInitialQuery();
- client_info.query_kind = query_kind;
+ global_context->setQueryKindInitial();
+ global_context->setQueryKind(query_kind);
}
diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt
index 855973d10e1..b8241afa1eb 100644
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@@ -1,12 +1,8 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
-
set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp
Server.cpp
)
-set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}")
-
set (CLICKHOUSE_SERVER_LINK
PRIVATE
clickhouse_aggregate_functions
@@ -34,9 +30,3 @@ endif()
clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
-
-clickhouse_embed_binaries(
- TARGET clickhouse_server_configs
- RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
-)
-add_dependencies(clickhouse-server-lib clickhouse_server_configs)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 948824242fb..405ebf7fb2f 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -128,6 +128,10 @@
# include
#endif
+#include
+/// A minimal file used when the server is run without installation
+INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml");
+
namespace CurrentMetrics
{
extern const Metric Revision;
@@ -393,6 +397,7 @@ int Server::run()
void Server::initialize(Poco::Util::Application & self)
{
+ ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize));
BaseDaemon::initialize(self);
logger().information("starting up");
@@ -739,11 +744,13 @@ try
[&]() -> std::vector
{
std::vector metrics;
- metrics.reserve(servers_to_start_before_tables.size());
+
+ std::lock_guard lock(servers_lock);
+ metrics.reserve(servers_to_start_before_tables.size() + servers.size());
+
for (const auto & server : servers_to_start_before_tables)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
- std::lock_guard lock(servers_lock);
for (const auto & server : servers)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
return metrics;
@@ -1028,6 +1035,11 @@ try
/// Initialize merge tree metadata cache
if (config().has("merge_tree_metadata_cache"))
{
+ global_context->addWarningMessage("The setting 'merge_tree_metadata_cache' is enabled."
+ " But the feature of 'metadata cache in RocksDB' is experimental and is not ready for production."
+ " The usage of this feature can lead to data corruption and loss. The setting should be disabled in production."
+ " See the corresponding report at https://github.com/ClickHouse/ClickHouse/issues/51182");
+
fs::create_directories(path / "rocksdb/");
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
@@ -1105,8 +1117,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector extra_paths = {include_from_path};
- if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
- if (!key_path.empty()) extra_paths.emplace_back(key_path);
+ if (!cert_path.empty())
+ extra_paths.emplace_back(cert_path);
+ if (!key_path.empty())
+ extra_paths.emplace_back(key_path);
auto main_config_reloader = std::make_unique(
config_path,
@@ -1304,7 +1318,7 @@ try
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
std::lock_guard lock(servers_lock);
- updateServers(*config, server_pool, async_metrics, servers);
+ updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
}
global_context->updateStorageConfiguration(*config);
@@ -1406,10 +1420,27 @@ try
}
- for (auto & server : servers_to_start_before_tables)
{
- server.start();
- LOG_INFO(log, "Listening for {}", server.getDescription());
+ std::lock_guard lock(servers_lock);
+ /// We should start interserver communications before (and more imporant shutdown after) tables.
+ /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
+ /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
+ /// communicate with zookeeper, execute merges, etc.
+ createInterserverServers(
+ config(),
+ interserver_listen_hosts,
+ listen_try,
+ server_pool,
+ async_metrics,
+ servers_to_start_before_tables,
+ /* start_servers= */ false);
+
+
+ for (auto & server : servers_to_start_before_tables)
+ {
+ server.start();
+ LOG_INFO(log, "Listening for {}", server.getDescription());
+ }
}
/// Initialize access storages.
@@ -1431,6 +1462,24 @@ try
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
});
+ global_context->setStopServersCallback([&](const ServerType & server_type)
+ {
+ stopServers(servers, server_type);
+ });
+
+ global_context->setStartServersCallback([&](const ServerType & server_type)
+ {
+ createServers(
+ config(),
+ listen_hosts,
+ listen_try,
+ server_pool,
+ async_metrics,
+ servers,
+ /* start_servers= */ true,
+ server_type);
+ });
+
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
@@ -1451,16 +1500,18 @@ try
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
- const Settings & settings = global_context->getSettingsRef();
/// Initialize background executors after we load default_profile config.
/// This is needed to load proper values of background_pool_size etc.
global_context->initializeBackgroundExecutorsIfNeeded();
- if (settings.async_insert_threads)
+ if (server_settings.async_insert_threads)
+ {
global_context->setAsynchronousInsertQueue(std::make_shared(
global_context,
- settings.async_insert_threads));
+ server_settings.async_insert_threads,
+ server_settings.async_insert_queue_flush_on_shutdown));
+ }
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
@@ -1529,10 +1580,13 @@ try
{
LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
size_t current_connections = 0;
- for (auto & server : servers_to_start_before_tables)
{
- server.stop();
- current_connections += server.currentConnections();
+ std::lock_guard lock(servers_lock);
+ for (auto & server : servers_to_start_before_tables)
+ {
+ server.stop();
+ current_connections += server.currentConnections();
+ }
}
if (current_connections)
@@ -1601,13 +1655,7 @@ try
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
/// Build loggers before tables startup to make log messages from tables
/// attach available in system.text_log
- {
- String level_str = config().getString("text_log.level", "");
- int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
- setTextLog(global_context->getTextLog(), level);
-
- buildLoggers(config(), logger());
- }
+ buildLoggers(config(), logger());
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
@@ -1711,7 +1759,7 @@ try
{
std::lock_guard lock(servers_lock);
- createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers);
+ createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
if (servers.empty())
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
@@ -1969,12 +2017,12 @@ HTTPContextPtr Server::httpContext() const
void Server::createServers(
Poco::Util::AbstractConfiguration & config,
const Strings & listen_hosts,
- const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector & servers,
- bool start_servers)
+ bool start_servers,
+ const ServerType & server_type)
{
const Settings & settings = global_context->getSettingsRef();
@@ -1988,6 +2036,9 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
+ if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
+ continue;
+
std::vector hosts;
if (config.has("protocols." + protocol + ".host"))
hosts.push_back(config.getString("protocols." + protocol + ".host"));
@@ -2034,219 +2085,310 @@ void Server::createServers(
for (const auto & listen_host : listen_hosts)
{
- /// HTTP
- const char * port_name = "http_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
- {
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port);
- socket.setReceiveTimeout(settings.http_receive_timeout);
- socket.setSendTimeout(settings.http_send_timeout);
+ const char * port_name;
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "http://" + address.toString(),
- std::make_unique(
- httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
- });
-
- /// HTTPS
- port_name = "https_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::HTTP))
{
+ /// HTTP
+ port_name = "http_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port);
+ socket.setReceiveTimeout(settings.http_receive_timeout);
+ socket.setSendTimeout(settings.http_send_timeout);
+
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "http://" + address.toString(),
+ std::make_unique(
+ httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
+ });
+ }
+
+ if (server_type.shouldStart(ServerType::Type::HTTPS))
+ {
+ /// HTTPS
+ port_name = "https_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
#if USE_SSL
- Poco::Net::SecureServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
- socket.setReceiveTimeout(settings.http_receive_timeout);
- socket.setSendTimeout(settings.http_send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "https://" + address.toString(),
- std::make_unique(
- httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
+ Poco::Net::SecureServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+ socket.setReceiveTimeout(settings.http_receive_timeout);
+ socket.setSendTimeout(settings.http_send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "https://" + address.toString(),
+ std::make_unique(
+ httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
#else
- UNUSED(port);
- throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
+ UNUSED(port);
+ throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
#endif
- });
+ });
+ }
- /// TCP
- port_name = "tcp_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::TCP))
{
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port);
- socket.setReceiveTimeout(settings.receive_timeout);
- socket.setSendTimeout(settings.send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "native protocol (tcp): " + address.toString(),
- std::make_unique(
- new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
- server_pool,
- socket,
- new Poco::Net::TCPServerParams));
- });
+ /// TCP
+ port_name = "tcp_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port);
+ socket.setReceiveTimeout(settings.receive_timeout);
+ socket.setSendTimeout(settings.send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "native protocol (tcp): " + address.toString(),
+ std::make_unique(
+ new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
+ server_pool,
+ socket,
+ new Poco::Net::TCPServerParams));
+ });
+ }
- /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
- port_name = "tcp_with_proxy_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
{
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port);
- socket.setReceiveTimeout(settings.receive_timeout);
- socket.setSendTimeout(settings.send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "native protocol (tcp) with PROXY: " + address.toString(),
- std::make_unique(
- new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
- server_pool,
- socket,
- new Poco::Net::TCPServerParams));
- });
+ /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
+ port_name = "tcp_with_proxy_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port);
+ socket.setReceiveTimeout(settings.receive_timeout);
+ socket.setSendTimeout(settings.send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "native protocol (tcp) with PROXY: " + address.toString(),
+ std::make_unique(
+ new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
+ server_pool,
+ socket,
+ new Poco::Net::TCPServerParams));
+ });
+ }
- /// TCP with SSL
- port_name = "tcp_port_secure";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
{
-#if USE_SSL
- Poco::Net::SecureServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
- socket.setReceiveTimeout(settings.receive_timeout);
- socket.setSendTimeout(settings.send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "secure native protocol (tcp_secure): " + address.toString(),
- std::make_unique(
- new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
- server_pool,
- socket,
- new Poco::Net::TCPServerParams));
-#else
- UNUSED(port);
- throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
- });
+ /// TCP with SSL
+ port_name = "tcp_port_secure";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ #if USE_SSL
+ Poco::Net::SecureServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+ socket.setReceiveTimeout(settings.receive_timeout);
+ socket.setSendTimeout(settings.send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "secure native protocol (tcp_secure): " + address.toString(),
+ std::make_unique(
+ new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
+ server_pool,
+ socket,
+ new Poco::Net::TCPServerParams));
+ #else
+ UNUSED(port);
+ throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+ #endif
+ });
+ }
- port_name = "mysql_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::MYSQL))
{
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
- socket.setReceiveTimeout(Poco::Timespan());
- socket.setSendTimeout(settings.send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "MySQL compatibility protocol: " + address.toString(),
- std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
- });
+ port_name = "mysql_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+ socket.setReceiveTimeout(Poco::Timespan());
+ socket.setSendTimeout(settings.send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "MySQL compatibility protocol: " + address.toString(),
+ std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+ });
+ }
- port_name = "postgresql_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
{
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
- socket.setReceiveTimeout(Poco::Timespan());
- socket.setSendTimeout(settings.send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "PostgreSQL compatibility protocol: " + address.toString(),
- std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
- });
+ port_name = "postgresql_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+ socket.setReceiveTimeout(Poco::Timespan());
+ socket.setSendTimeout(settings.send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "PostgreSQL compatibility protocol: " + address.toString(),
+ std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+ });
+ }
#if USE_GRPC
- port_name = "grpc_port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::GRPC))
{
- Poco::Net::SocketAddress server_address(listen_host, port);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "gRPC protocol: " + server_address.toString(),
- std::make_unique(*this, makeSocketAddress(listen_host, port, &logger())));
- });
+ port_name = "grpc_port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::SocketAddress server_address(listen_host, port);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "gRPC protocol: " + server_address.toString(),
+ std::make_unique(*this, makeSocketAddress(listen_host, port, &logger())));
+ });
+ }
#endif
-
- /// Prometheus (if defined and not setup yet with http_port)
- port_name = "prometheus.port";
- createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
{
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, listen_host, port);
- socket.setReceiveTimeout(settings.http_receive_timeout);
- socket.setSendTimeout(settings.http_send_timeout);
- return ProtocolServerAdapter(
- listen_host,
- port_name,
- "Prometheus: http://" + address.toString(),
- std::make_unique(
- httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
- });
+ /// Prometheus (if defined and not setup yet with http_port)
+ port_name = "prometheus.port";
+ createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, listen_host, port);
+ socket.setReceiveTimeout(settings.http_receive_timeout);
+ socket.setSendTimeout(settings.http_send_timeout);
+ return ProtocolServerAdapter(
+ listen_host,
+ port_name,
+ "Prometheus: http://" + address.toString(),
+ std::make_unique(
+ httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
+ });
+ }
}
+}
+
+void Server::createInterserverServers(
+ Poco::Util::AbstractConfiguration & config,
+ const Strings & interserver_listen_hosts,
+ bool listen_try,
+ Poco::ThreadPool & server_pool,
+ AsynchronousMetrics & async_metrics,
+ std::vector & servers,
+ bool start_servers,
+ const ServerType & server_type)
+{
+ const Settings & settings = global_context->getSettingsRef();
+
+ Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
+ Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+ http_params->setTimeout(settings.http_receive_timeout);
+ http_params->setKeepAliveTimeout(keep_alive_timeout);
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)
{
- /// Interserver IO HTTP
- const char * port_name = "interserver_http_port";
- createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
- {
- Poco::Net::ServerSocket socket;
- auto address = socketBindListen(config, socket, interserver_listen_host, port);
- socket.setReceiveTimeout(settings.http_receive_timeout);
- socket.setSendTimeout(settings.http_send_timeout);
- return ProtocolServerAdapter(
- interserver_listen_host,
- port_name,
- "replica communication (interserver): http://" + address.toString(),
- std::make_unique(
- httpContext(),
- createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
- server_pool,
- socket,
- http_params));
- });
+ const char * port_name;
- port_name = "interserver_https_port";
- createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
{
+ /// Interserver IO HTTP
+ port_name = "interserver_http_port";
+ createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
+ Poco::Net::ServerSocket socket;
+ auto address = socketBindListen(config, socket, interserver_listen_host, port);
+ socket.setReceiveTimeout(settings.http_receive_timeout);
+ socket.setSendTimeout(settings.http_send_timeout);
+ return ProtocolServerAdapter(
+ interserver_listen_host,
+ port_name,
+ "replica communication (interserver): http://" + address.toString(),
+ std::make_unique(
+ httpContext(),
+ createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
+ server_pool,
+ socket,
+ http_params));
+ });
+ }
+
+ if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
+ {
+ port_name = "interserver_https_port";
+ createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+ {
#if USE_SSL
- Poco::Net::SecureServerSocket socket;
- auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
- socket.setReceiveTimeout(settings.http_receive_timeout);
- socket.setSendTimeout(settings.http_send_timeout);
- return ProtocolServerAdapter(
- interserver_listen_host,
- port_name,
- "secure replica communication (interserver): https://" + address.toString(),
- std::make_unique(
- httpContext(),
- createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
- server_pool,
- socket,
- http_params));
+ Poco::Net::SecureServerSocket socket;
+ auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
+ socket.setReceiveTimeout(settings.http_receive_timeout);
+ socket.setSendTimeout(settings.http_send_timeout);
+ return ProtocolServerAdapter(
+ interserver_listen_host,
+ port_name,
+ "secure replica communication (interserver): https://" + address.toString(),
+ std::make_unique(
+ httpContext(),
+ createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
+ server_pool,
+ socket,
+ http_params));
#else
- UNUSED(port);
- throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+ UNUSED(port);
+ throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
#endif
- });
+ });
+ }
+ }
+}
+
+void Server::stopServers(
+ std::vector & servers,
+ const ServerType & server_type
+) const
+{
+ Poco::Logger * log = &logger();
+
+ /// Remove servers once all their connections are closed
+ auto check_server = [&log](const char prefix[], auto & server)
+ {
+ if (!server.isStopping())
+ return false;
+ size_t current_connections = server.currentConnections();
+ LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
+ server.getDescription(),
+ prefix,
+ !current_connections ? "finished" : "waiting",
+ current_connections);
+ return !current_connections;
+ };
+
+ std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
+
+ for (auto & server : servers)
+ {
+ if (!server.isStopping())
+ {
+ const std::string server_port_name = server.getPortName();
+
+ if (server_type.shouldStop(server_port_name))
+ server.stop();
+ }
}
+ std::erase_if(servers, std::bind_front(check_server, ""));
}
void Server::updateServers(
Poco::Util::AbstractConfiguration & config,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
- std::vector & servers)
+ std::vector & servers,
+ std::vector & servers_to_start_before_tables)
{
Poco::Logger * log = &logger();
@@ -2272,11 +2414,19 @@ void Server::updateServers(
Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config();
+ std::vector all_servers;
+ all_servers.reserve(servers.size() + servers_to_start_before_tables.size());
for (auto & server : servers)
+ all_servers.push_back(&server);
+
+ for (auto & server : servers_to_start_before_tables)
+ all_servers.push_back(&server);
+
+ for (auto * server : all_servers)
{
- if (!server.isStopping())
+ if (!server->isStopping())
{
- std::string port_name = server.getPortName();
+ std::string port_name = server->getPortName();
bool has_host = false;
bool is_http = false;
if (port_name.starts_with("protocols."))
@@ -2314,27 +2464,29 @@ void Server::updateServers(
/// NOTE: better to compare using getPortName() over using
/// dynamic_cast<> since HTTPServer is also used for prometheus and
/// internal replication communications.
- is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port";
+ is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port";
}
if (!has_host)
- has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end();
+ has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end();
bool has_port = !config.getString(port_name, "").empty();
bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
if (force_restart)
- LOG_TRACE(log, " had been changed, will reload {}", server.getDescription());
+ LOG_TRACE(log, " had been changed, will reload {}", server->getDescription());
- if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart)
+ if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart)
{
- server.stop();
- LOG_INFO(log, "Stopped listening for {}", server.getDescription());
+ server->stop();
+ LOG_INFO(log, "Stopped listening for {}", server->getDescription());
}
}
}
- createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
+ createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
+ createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true);
std::erase_if(servers, std::bind_front(check_server, ""));
+ std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, ""));
}
}
diff --git a/programs/server/Server.h b/programs/server/Server.h
index e9ae6d8d937..3f03dd137ef 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -3,8 +3,9 @@
#include
#include
-#include "Server/HTTP/HTTPContext.h"
+#include
#include
+#include
#include
/** Server provides three interfaces:
@@ -102,18 +103,34 @@ private:
void createServers(
Poco::Util::AbstractConfiguration & config,
const Strings & listen_hosts,
+ bool listen_try,
+ Poco::ThreadPool & server_pool,
+ AsynchronousMetrics & async_metrics,
+ std::vector & servers,
+ bool start_servers = false,
+ const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
+
+ void createInterserverServers(
+ Poco::Util::AbstractConfiguration & config,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
std::vector & servers,
- bool start_servers = false);
+ bool start_servers = false,
+ const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
void updateServers(
Poco::Util::AbstractConfiguration & config,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
- std::vector & servers);
+ std::vector & servers,
+ std::vector & servers_to_start_before_tables);
+
+ void stopServers(
+ std::vector & servers,
+ const ServerType & server_type
+ ) const;
};
}
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 2a7dc1e576a..14b8954fc39 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1026,6 +1026,14 @@
7500
+
+ 1048576
+
+ 8192
+
+ 524288
+
+ false
@@ -1039,6 +1047,11 @@
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
+
+ false
@@ -1084,7 +1109,11 @@
system
7500
+ 1048576
+ 8192
+ 524288
1000
+ false
@@ -1151,6 +1196,10 @@
toYYYYMM(event_date)
7500
+ 1048576
+ 8192
+ 524288
+ false
-
-
don't replace it
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
}
}
@@ -263,14 +263,14 @@ void QueryCache::Writer::finalizeWrite()
if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime)
{
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string);
return;
}
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
{
/// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
return;
}
@@ -353,7 +353,7 @@ void QueryCache::Writer::finalizeWrite()
if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows))
{
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string);
return;
}
@@ -388,7 +388,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
if (!entry.has_value())
{
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string);
return;
}
@@ -397,13 +397,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
if (!entry_key.is_shared && entry_key.user_name != key.user_name)
{
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string);
return;
}
if (IsStale()(entry_key))
{
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string);
return;
}
@@ -441,7 +441,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes);
}
- LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
+ LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string);
}
bool QueryCache::Reader::hasCacheEntryForKey() const
@@ -496,6 +496,16 @@ void QueryCache::reset()
cache_size_in_bytes = 0;
}
+size_t QueryCache::weight() const
+{
+ return cache.weight();
+}
+
+size_t QueryCache::count() const
+{
+ return cache.count();
+}
+
size_t QueryCache::recordQueryRun(const Key & key)
{
std::lock_guard lock(mutex);
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index 6ef7cc60918..eaa54c503fa 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -24,13 +24,21 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
class QueryCache
{
public:
+ enum class Usage
+ {
+ Unknown, /// we don't know what what happened
+ None, /// query result neither written nor read into/from query cache
+ Write, /// query result written into query cache
+ Read, /// query result read from query cache
+ };
+
/// Represents a query result in the cache.
struct Key
{
/// ----------------------------------------------------
/// The actual key (data which gets hashed):
- /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select)
+ /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select).
const ASTPtr ast;
/// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the
@@ -58,6 +66,11 @@ public:
/// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case)
const bool is_compressed;
+ /// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the
+ /// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in
+ /// SYSTEM.QUERY_CACHE.
+ const String query_string;
+
/// Ctor to construct a Key for writing into query cache.
Key(ASTPtr ast_,
Block header_,
@@ -69,7 +82,6 @@ public:
Key(ASTPtr ast_, const String & user_name_);
bool operator==(const Key & other) const;
- String queryStringFromAst() const;
};
struct Entry
@@ -174,6 +186,9 @@ public:
void reset();
+ size_t weight() const;
+ size_t count() const;
+
/// Record new execution of query represented by key. Returns number of executions so far.
size_t recordQueryRun(const Key & key);
@@ -181,7 +196,7 @@ public:
std::vector dump() const;
private:
- Cache cache;
+ Cache cache; /// has its own locking --> not protected by mutex
mutable std::mutex mutex;
TimesExecuted times_executed TSA_GUARDED_BY(mutex);
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index de10a445d01..b90acd1d576 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -144,12 +144,6 @@ public:
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
- Address(
- const String & host_port_,
- const ClusterConnectionParameters & params,
- UInt32 shard_index_,
- UInt32 replica_index_);
-
Address(
const DatabaseReplicaInfo & info,
const ClusterConnectionParameters & params,
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 0cf3f360994..953e38d56cd 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
{
remote_shards.emplace_back(Shard{
.query = query_ast,
+ .main_table = main_table,
.header = header,
.shard_info = shard_info,
.lazy = lazy,
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index 030c0b77dd5..1cc5a3b1a77 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -50,6 +50,8 @@ public:
{
/// Query and header may be changed depending on shard.
ASTPtr query;
+ /// Used to check the table existence on remote node
+ StorageID main_table;
Block header;
Cluster::ShardInfo shard_info;
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index e2f1dfe8ba7..2fed626ffb7 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -35,7 +35,12 @@ namespace ErrorCodes
namespace ClusterProxy
{
-ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ ContextPtr context,
+ const Settings & settings,
+ const StorageID & main_table,
+ const SelectQueryInfo * query_info,
+ Poco::Logger * log)
{
Settings new_settings = settings;
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
@@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
/// If "secret" (in remote_servers) is not in use,
/// user on the shard is not the same as the user on the initiator,
/// hence per-user limits should not be applied.
- if (cluster.getSecret().empty())
+ if (!interserver_mode)
{
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
@@ -170,17 +175,15 @@ void executeQuery(
std::vector plans;
SelectStreamFactory::Shards remote_shards;
- auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
- new_context->getClientInfo().distributed_depth += 1;
+ auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
+ new_context->increaseDistributedDepth();
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
- ASTPtr query_ast_for_shard;
- if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
+ ASTPtr query_ast_for_shard = query_ast->clone();
+ if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
- query_ast_for_shard = query_ast->clone();
-
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_expr->getSampleBlock().getByPosition(0).type,
@@ -191,8 +194,6 @@ void executeQuery(
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
- else
- query_ast_for_shard = query_ast->clone();
if (shard_filter_generator)
{
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index 41f6da55686..511914e99e4 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -34,8 +34,12 @@ class SelectStreamFactory;
/// - optimize_skip_unused_shards_nesting
///
/// @return new Context with adjusted settings
-ContextMutablePtr updateSettingsForCluster(
- const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
+ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ ContextPtr context,
+ const Settings & settings,
+ const StorageID & main_table,
+ const SelectQueryInfo * query_info = nullptr,
+ Poco::Logger * log = nullptr);
using AdditionalShardFilterGenerator = std::function;
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a9055bbb1b9..7c3646a9583 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -357,6 +358,9 @@ struct ContextSharedPart : boost::noncopyable
Context::ConfigReloadCallback config_reload_callback;
+ Context::StartStopServersCallback start_servers_callback;
+ Context::StartStopServersCallback stop_servers_callback;
+
bool is_server_completely_started = false;
#if USE_ROCKSDB
@@ -784,15 +788,32 @@ Strings Context::getWarnings() const
auto lock = getLock();
common_warnings = shared->warnings;
}
+ /// Make setting's name ordered
+ std::set obsolete_settings;
for (const auto & setting : settings)
{
if (setting.isValueChanged() && setting.isObsolete())
- {
- common_warnings.emplace_back("Some obsolete setting is changed. "
- "Check 'select * from system.settings where changed' and read the changelog.");
- break;
- }
+ obsolete_settings.emplace(setting.getName());
}
+
+ if (!obsolete_settings.empty())
+ {
+ bool single_element = obsolete_settings.size() == 1;
+ String res = single_element ? "Obsolete setting [" : "Obsolete settings [";
+
+ bool first = true;
+ for (const auto & setting : obsolete_settings)
+ {
+ res += first ? "" : ", ";
+ res += "'" + setting + "'";
+ first = false;
+ }
+ res = res + "]" + (single_element ? " is" : " are")
+ + " changed. "
+ "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog.";
+ common_warnings.emplace_back(res);
+ }
+
return common_warnings;
}
@@ -1059,25 +1080,54 @@ ConfigurationPtr Context::getUsersConfig()
return shared->users_config;
}
-void Context::setUser(const UUID & user_id_)
+void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_)
{
+ /// Prepare lists of user's profiles, constraints, settings, roles.
+
+ std::shared_ptr user;
+ std::shared_ptr temp_access;
+ if (set_current_profiles_ || set_current_roles_ || set_current_database_)
+ {
+ std::optional params;
+ {
+ auto lock = getLock();
+ params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info});
+ }
+ /// `temp_access` is used here only to extract information about the user, not to actually check access.
+ /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this.
+ temp_access = getAccessControl().getContextAccess(*params);
+ user = temp_access->getUser();
+ }
+
+ std::shared_ptr profiles;
+ if (set_current_profiles_)
+ profiles = temp_access->getDefaultProfileInfo();
+
+ std::optional> roles;
+ if (set_current_roles_)
+ roles = user->granted_roles.findGranted(user->default_roles);
+
+ String database;
+ if (set_current_database_)
+ database = user->default_database;
+
+ /// Apply user's profiles, constraints, settings, roles.
auto lock = getLock();
- user_id = user_id_;
+ setUserID(user_id_);
- access = getAccessControl().getContextAccess(
- user_id_, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info);
+ if (profiles)
+ {
+ /// A profile can specify a value and a readonly constraint for same setting at the same time,
+ /// so we shouldn't check constraints here.
+ setCurrentProfiles(*profiles, /* check_constraints= */ false);
+ }
- auto user = access->getUser();
+ if (roles)
+ setCurrentRoles(*roles);
- current_roles = std::make_shared>(user->granted_roles.findGranted(user->default_roles));
-
- auto default_profile_info = access->getDefaultProfileInfo();
- settings_constraints_and_current_profiles = default_profile_info->getConstraintsAndProfileIDs();
- applySettingsChanges(default_profile_info->settings);
-
- if (!user->default_database.empty())
- setCurrentDatabase(user->default_database);
+ if (!database.empty())
+ setCurrentDatabase(database);
}
std::shared_ptr Context::getUser() const
@@ -1090,6 +1140,13 @@ String Context::getUserName() const
return getAccess()->getUserName();
}
+void Context::setUserID(const UUID & user_id_)
+{
+ auto lock = getLock();
+ user_id = user_id_;
+ need_recalculate_access = true;
+}
+
std::optional Context::getUserID() const
{
auto lock = getLock();
@@ -1107,10 +1164,11 @@ void Context::setQuotaKey(String quota_key_)
void Context::setCurrentRoles(const std::vector & current_roles_)
{
auto lock = getLock();
- if (current_roles ? (*current_roles == current_roles_) : current_roles_.empty())
- return;
- current_roles = std::make_shared>(current_roles_);
- calculateAccessRights();
+ if (current_roles_.empty())
+ current_roles = nullptr;
+ else
+ current_roles = std::make_shared>(current_roles_);
+ need_recalculate_access = true;
}
void Context::setCurrentRolesDefault()
@@ -1135,20 +1193,6 @@ std::shared_ptr Context::getRolesInfo() const
}
-void Context::calculateAccessRights()
-{
- auto lock = getLock();
- if (user_id)
- access = getAccessControl().getContextAccess(
- *user_id,
- current_roles ? *current_roles : std::vector{},
- /* use_default_roles = */ false,
- settings,
- current_database,
- client_info);
-}
-
-
template
void Context::checkAccessImpl(const Args &... args) const
{
@@ -1168,32 +1212,55 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id,
void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); }
void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); }
-
std::shared_ptr Context::getAccess() const
{
- auto lock = getLock();
- return access ? access : ContextAccess::getFullAccess();
+ /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired.
+ auto get_params = [this]()
+ {
+ /// If setUserID() was never called then this must be the global context with the full access.
+ bool full_access = !user_id;
+
+ return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info};
+ };
+
+ /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights.
+ std::optional params;
+
+ {
+ auto lock = getLock();
+ if (access && !need_recalculate_access)
+ return access; /// No need to recalculate access rights.
+
+ params.emplace(get_params());
+
+ if (access && (access->getParams() == *params))
+ {
+ need_recalculate_access = false;
+ return access; /// No need to recalculate access rights.
+ }
+ }
+
+ /// Calculate new access rights according to the collected parameters.
+ /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this.
+ auto res = getAccessControl().getContextAccess(*params);
+
+ {
+ /// If the parameters of access rights were not changed while we were calculated them
+ /// then we store the new access rights in the Context to allow reusing it later.
+ auto lock = getLock();
+ if (get_params() == *params)
+ {
+ access = res;
+ need_recalculate_access = false;
+ }
+ }
+
+ return res;
}
RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
{
- auto lock = getLock();
- RowPolicyFilterPtr row_filter_of_initial_user;
- if (row_policies_of_initial_user)
- row_filter_of_initial_user = row_policies_of_initial_user->getFilter(database, table_name, filter_type);
- return getAccess()->getRowPolicyFilter(database, table_name, filter_type, row_filter_of_initial_user);
-}
-
-void Context::enableRowPoliciesOfInitialUser()
-{
- auto lock = getLock();
- row_policies_of_initial_user = nullptr;
- if (client_info.initial_user == client_info.current_user)
- return;
- auto initial_user_id = getAccessControl().find(client_info.initial_user);
- if (!initial_user_id)
- return;
- row_policies_of_initial_user = getAccessControl().tryGetDefaultRowPolicies(*initial_user_id);
+ return getAccess()->getRowPolicyFilter(database, table_name, filter_type);
}
@@ -1209,13 +1276,12 @@ std::optional Context::getQuotaUsage() const
}
-void Context::setCurrentProfile(const String & profile_name)
+void Context::setCurrentProfile(const String & profile_name, bool check_constraints)
{
- auto lock = getLock();
try
{
UUID profile_id = getAccessControl().getID(profile_name);
- setCurrentProfile(profile_id);
+ setCurrentProfile(profile_id, check_constraints);
}
catch (Exception & e)
{
@@ -1224,15 +1290,20 @@ void Context::setCurrentProfile(const String & profile_name)
}
}
-void Context::setCurrentProfile(const UUID & profile_id)
+void Context::setCurrentProfile(const UUID & profile_id, bool check_constraints)
{
- auto lock = getLock();
auto profile_info = getAccessControl().getSettingsProfileInfo(profile_id);
- checkSettingsConstraints(profile_info->settings);
- applySettingsChanges(profile_info->settings);
- settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles);
+ setCurrentProfiles(*profile_info, check_constraints);
}
+void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints)
+{
+ auto lock = getLock();
+ if (check_constraints)
+ checkSettingsConstraints(profiles_info.settings);
+ applySettingsChanges(profiles_info.settings);
+ settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles);
+}
std::vector Context::getCurrentProfiles() const
{
@@ -1411,15 +1482,24 @@ void Context::addQueryAccessInfo(
void Context::addQueryAccessInfo(const Names & partition_names)
{
if (isGlobalContext())
- {
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
- }
std::lock_guard lock(query_access_info.mutex);
for (const auto & partition_name : partition_names)
- {
query_access_info.partitions.emplace(partition_name);
- }
+}
+
+void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name)
+{
+ if (!qualified_projection_name)
+ return;
+
+ if (isGlobalContext())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info");
+
+ std::lock_guard lock(query_access_info.mutex);
+ query_access_info.projections.emplace(fmt::format(
+ "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name)));
}
void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
@@ -1706,27 +1786,8 @@ Settings Context::getSettings() const
void Context::setSettings(const Settings & settings_)
{
auto lock = getLock();
- const auto old_readonly = settings.readonly;
- const auto old_allow_ddl = settings.allow_ddl;
- const auto old_allow_introspection_functions = settings.allow_introspection_functions;
- const auto old_display_secrets = settings.format_display_secrets_in_show_and_select;
-
settings = settings_;
-
- if ((settings.readonly != old_readonly)
- || (settings.allow_ddl != old_allow_ddl)
- || (settings.allow_introspection_functions != old_allow_introspection_functions)
- || (settings.format_display_secrets_in_show_and_select != old_display_secrets))
- calculateAccessRights();
-}
-
-void Context::recalculateAccessRightsIfNeeded(std::string_view name)
-{
- if (name == "readonly"
- || name == "allow_ddl"
- || name == "allow_introspection_functions"
- || name == "format_display_secrets_in_show_and_select")
- calculateAccessRights();
+ need_recalculate_access = true;
}
void Context::setSetting(std::string_view name, const String & value)
@@ -1738,7 +1799,8 @@ void Context::setSetting(std::string_view name, const String & value)
return;
}
settings.set(name, value);
- recalculateAccessRightsIfNeeded(name);
+ if (ContextAccessParams::dependsOnSettingName(name))
+ need_recalculate_access = true;
}
void Context::setSetting(std::string_view name, const Field & value)
@@ -1750,7 +1812,8 @@ void Context::setSetting(std::string_view name, const Field & value)
return;
}
settings.set(name, value);
- recalculateAccessRightsIfNeeded(name);
+ if (ContextAccessParams::dependsOnSettingName(name))
+ need_recalculate_access = true;
}
void Context::applySettingChange(const SettingChange & change)
@@ -1859,7 +1922,7 @@ void Context::setCurrentDatabase(const String & name)
DatabaseCatalog::instance().assertDatabaseExists(name);
auto lock = getLock();
current_database = name;
- calculateAccessRights();
+ need_recalculate_access = true;
}
void Context::setCurrentQueryId(const String & query_id)
@@ -2172,9 +2235,9 @@ BackupsWorker & Context::getBackupsWorker() const
const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true);
const auto & config = getConfigRef();
- const auto & settings_ = getSettingsRef();
- UInt64 backup_threads = config.getUInt64("backup_threads", settings_.backup_threads);
- UInt64 restore_threads = config.getUInt64("restore_threads", settings_.restore_threads);
+ const auto & settings_ref = getSettingsRef();
+ UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads);
+ UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
if (!shared->backups_worker)
shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
@@ -2840,16 +2903,6 @@ std::map Context::getAuxiliaryZooKeepers() const
}
#if USE_ROCKSDB
-MergeTreeMetadataCachePtr Context::getMergeTreeMetadataCache() const
-{
- auto cache = tryGetMergeTreeMetadataCache();
- if (!cache)
- throw Exception(
- ErrorCodes::LOGICAL_ERROR,
- "Merge tree metadata cache is not initialized, please add config merge_tree_metadata_cache in config.xml and restart");
- return cache;
-}
-
MergeTreeMetadataCachePtr Context::tryGetMergeTreeMetadataCache() const
{
return shared->merge_tree_metadata_cache;
@@ -3147,6 +3200,12 @@ void Context::initializeMergeTreeMetadataCache(const String & dir, size_t size)
}
#endif
+/// Call after unexpected crash happen.
+void Context::handleCrash() const
+{
+ shared->system_logs->handleCrash();
+}
+
bool Context::hasTraceCollector() const
{
return shared->hasTraceCollector();
@@ -3629,6 +3688,36 @@ void Context::reloadConfig() const
shared->config_reload_callback();
}
+void Context::setStartServersCallback(StartStopServersCallback && callback)
+{
+ /// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
+ shared->start_servers_callback = std::move(callback);
+}
+
+void Context::setStopServersCallback(StartStopServersCallback && callback)
+{
+ /// Is initialized at server startup, so lock isn't required. Otherwise use mutex.
+ shared->stop_servers_callback = std::move(callback);
+}
+
+void Context::startServers(const ServerType & server_type) const
+{
+ /// Use mutex if callback may be changed after startup.
+ if (!shared->start_servers_callback)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set.");
+
+ shared->start_servers_callback(server_type);
+}
+
+void Context::stopServers(const ServerType & server_type) const
+{
+ /// Use mutex if callback may be changed after startup.
+ if (!shared->stop_servers_callback)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set.");
+
+ shared->stop_servers_callback(server_type);
+}
+
void Context::shutdown()
{
@@ -3833,6 +3922,129 @@ void Context::resetInputCallbacks()
}
+void Context::setClientInfo(const ClientInfo & client_info_)
+{
+ client_info = client_info_;
+ need_recalculate_access = true;
+}
+
+void Context::setClientName(const String & client_name)
+{
+ client_info.client_name = client_name;
+}
+
+void Context::setClientInterface(ClientInfo::Interface interface)
+{
+ client_info.interface = interface;
+ need_recalculate_access = true;
+}
+
+void Context::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+{
+ client_info.client_version_major = client_version_major;
+ client_info.client_version_minor = client_version_minor;
+ client_info.client_version_patch = client_version_patch;
+ client_info.client_tcp_protocol_version = client_tcp_protocol_version;
+}
+
+void Context::setClientConnectionId(uint32_t connection_id_)
+{
+ client_info.connection_id = connection_id_;
+}
+
+void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+{
+ client_info.http_method = http_method;
+ client_info.http_user_agent = http_user_agent;
+ client_info.http_referer = http_referer;
+ need_recalculate_access = true;
+}
+
+void Context::setForwardedFor(const String & forwarded_for)
+{
+ client_info.forwarded_for = forwarded_for;
+ need_recalculate_access = true;
+}
+
+void Context::setQueryKind(ClientInfo::QueryKind query_kind)
+{
+ client_info.query_kind = query_kind;
+}
+
+void Context::setQueryKindInitial()
+{
+ /// TODO: Try to combine this function with setQueryKind().
+ client_info.setInitialQuery();
+}
+
+void Context::setQueryKindReplicatedDatabaseInternal()
+{
+ /// TODO: Try to combine this function with setQueryKind().
+ client_info.is_replicated_database_internal = true;
+}
+
+void Context::setCurrentUserName(const String & current_user_name)
+{
+ /// TODO: Try to combine this function with setUser().
+ client_info.current_user = current_user_name;
+ need_recalculate_access = true;
+}
+
+void Context::setCurrentAddress(const Poco::Net::SocketAddress & current_address)
+{
+ client_info.current_address = current_address;
+ need_recalculate_access = true;
+}
+
+void Context::setInitialUserName(const String & initial_user_name)
+{
+ client_info.initial_user = initial_user_name;
+ need_recalculate_access = true;
+}
+
+void Context::setInitialAddress(const Poco::Net::SocketAddress & initial_address)
+{
+ client_info.initial_address = initial_address;
+}
+
+void Context::setInitialQueryId(const String & initial_query_id)
+{
+ client_info.initial_query_id = initial_query_id;
+}
+
+void Context::setInitialQueryStartTime(std::chrono::time_point initial_query_start_time)
+{
+ client_info.initial_query_start_time = timeInSeconds(initial_query_start_time);
+ client_info.initial_query_start_time_microseconds = timeInMicroseconds(initial_query_start_time);
+}
+
+void Context::setQuotaClientKey(const String & quota_key_)
+{
+ client_info.quota_key = quota_key_;
+ need_recalculate_access = true;
+}
+
+void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+{
+ client_info.connection_client_version_major = client_version_major;
+ client_info.connection_client_version_minor = client_version_minor;
+ client_info.connection_client_version_patch = client_version_patch;
+ client_info.connection_tcp_protocol_version = client_tcp_protocol_version;
+}
+
+void Context::setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica)
+{
+ client_info.collaborate_with_initiator = collaborate_with_initiator;
+ client_info.count_participating_replicas = all_replicas_count;
+ client_info.number_of_current_replica = number_of_current_replica;
+}
+
+void Context::increaseDistributedDepth()
+{
+ ++client_info.distributed_depth;
+}
+
+
StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where) const
{
if (storage_id.uuid != UUIDHelpers::Nil)
@@ -4322,10 +4534,10 @@ ReadSettings Context::getReadSettings() const
ReadSettings Context::getBackupReadSettings() const
{
- ReadSettings settings_ = getReadSettings();
- settings_.remote_throttler = getBackupsThrottler();
- settings_.local_throttler = getBackupsThrottler();
- return settings_;
+ ReadSettings read_settings = getReadSettings();
+ read_settings.remote_throttler = getBackupsThrottler();
+ read_settings.local_throttler = getBackupsThrottler();
+ return read_settings;
}
WriteSettings Context::getWriteSettings() const
@@ -4354,14 +4566,13 @@ std::shared_ptr Context::getAsyncReadCounters() const
Context::ParallelReplicasMode Context::getParallelReplicasMode() const
{
- const auto & settings_ = getSettingsRef();
+ const auto & settings_ref = getSettingsRef();
using enum Context::ParallelReplicasMode;
- if (!settings_.parallel_replicas_custom_key.value.empty())
+ if (!settings_ref.parallel_replicas_custom_key.value.empty())
return CUSTOM_KEY;
- if (settings_.allow_experimental_parallel_reading_from_replicas > 0
- && !settings_.use_hedged_requests)
+ if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests)
return READ_TASKS;
return SAMPLE_KEY;
@@ -4369,17 +4580,15 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
bool Context::canUseParallelReplicasOnInitiator() const
{
- const auto & settings_ = getSettingsRef();
- return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
- && settings_.max_parallel_replicas > 1
+ const auto & settings_ref = getSettingsRef();
+ return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
&& !getClientInfo().collaborate_with_initiator;
}
bool Context::canUseParallelReplicasOnFollower() const
{
- const auto & settings_ = getSettingsRef();
- return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
- && settings_.max_parallel_replicas > 1
+ const auto & settings_ref = getSettingsRef();
+ return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
&& getClientInfo().collaborate_with_initiator;
}
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c92e4a39b92..0d567816ec9 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -51,8 +51,8 @@ struct ContextSharedPart;
class ContextAccess;
struct User;
using UserPtr = std::shared_ptr;
+struct SettingsProfilesInfo;
struct EnabledRolesInfo;
-class EnabledRowPolicies;
struct RowPolicyFilter;
using RowPolicyFilterPtr = std::shared_ptr;
class EnabledQuota;
@@ -134,6 +134,7 @@ using StoragePolicyPtr = std::shared_ptr;
using StoragePoliciesMap = std::map;
class StoragePolicySelector;
using StoragePolicySelectorPtr = std::shared_ptr;
+class ServerType;
template
class MergeTreeBackgroundExecutor;
@@ -249,8 +250,8 @@ private:
std::optional user_id;
std::shared_ptr> current_roles;
std::shared_ptr settings_constraints_and_current_profiles;
- std::shared_ptr access;
- std::shared_ptr row_policies_of_initial_user;
+ mutable std::shared_ptr access;
+ mutable bool need_recalculate_access = true;
String current_database;
Settings settings; /// Setting for query execution.
@@ -530,12 +531,14 @@ public:
/// Sets the current user assuming that he/she is already authenticated.
/// WARNING: This function doesn't check password!
- void setUser(const UUID & user_id_);
-
+ void setUser(const UUID & user_id_, bool set_current_profiles_ = true, bool set_current_roles_ = true, bool set_current_database_ = true);
UserPtr getUser() const;
- String getUserName() const;
+
+ void setUserID(const UUID & user_id_);
std::optional getUserID() const;
+ String getUserName() const;
+
void setQuotaKey(String quota_key_);
void setCurrentRoles(const std::vector & current_roles_);
@@ -544,8 +547,9 @@ public:
boost::container::flat_set getEnabledRoles() const;
std::shared_ptr getRolesInfo() const;
- void setCurrentProfile(const String & profile_name);
- void setCurrentProfile(const UUID & profile_id);
+ void setCurrentProfile(const String & profile_name, bool check_constraints = true);
+ void setCurrentProfile(const UUID & profile_id, bool check_constraints = true);
+ void setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints = true);
std::vector getCurrentProfiles() const;
std::vector getEnabledProfiles() const;
@@ -568,13 +572,6 @@ public:
RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;
- /// Finds and sets extra row policies to be used based on `client_info.initial_user`,
- /// if the initial user exists.
- /// TODO: we need a better solution here. It seems we should pass the initial row policy
- /// because a shard is allowed to not have the initial user or it might be another user
- /// with the same name.
- void enableRowPoliciesOfInitialUser();
-
std::shared_ptr getQuota() const;
std::optional getQuotaUsage() const;
@@ -598,9 +595,33 @@ public:
InputBlocksReader getInputBlocksReaderCallback() const;
void resetInputCallbacks();
- ClientInfo & getClientInfo() { return client_info; }
+ /// Returns information about the client executing a query.
const ClientInfo & getClientInfo() const { return client_info; }
+ /// Modify stored in the context information about the client executing a query.
+ void setClientInfo(const ClientInfo & client_info_);
+ void setClientName(const String & client_name);
+ void setClientInterface(ClientInfo::Interface interface);
+ void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
+ void setClientConnectionId(uint32_t connection_id);
+ void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+ void setForwardedFor(const String & forwarded_for);
+ void setQueryKind(ClientInfo::QueryKind query_kind);
+ void setQueryKindInitial();
+ void setQueryKindReplicatedDatabaseInternal();
+ void setCurrentUserName(const String & current_user_name);
+ void setCurrentAddress(const Poco::Net::SocketAddress & current_address);
+ void setInitialUserName(const String & initial_user_name);
+ void setInitialAddress(const Poco::Net::SocketAddress & initial_address);
+ void setInitialQueryId(const String & initial_query_id);
+ void setInitialQueryStartTime(std::chrono::time_point initial_query_start_time);
+ void setQuotaClientKey(const String & quota_key);
+ void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
+ void setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica);
+ void increaseDistributedDepth();
+ const OpenTelemetry::TracingContext & getClientTraceContext() const { return client_info.client_trace_context; }
+ OpenTelemetry::TracingContext & getClientTraceContext() { return client_info.client_trace_context; }
+
enum StorageNamespace
{
ResolveGlobal = 1u, /// Database name must be specified
@@ -638,6 +659,14 @@ public:
const String & view_name = {});
void addQueryAccessInfo(const Names & partition_names);
+ struct QualifiedProjectionName
+ {
+ StorageID storage_id = StorageID::createEmpty();
+ String projection_name;
+ explicit operator bool() const { return !projection_name.empty(); }
+ };
+ void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name);
+
/// Supported factories for records in query_log
enum class QueryLogFactories
@@ -860,7 +889,6 @@ public:
void setClientProtocolVersion(UInt64 version);
#if USE_ROCKSDB
- MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const;
MergeTreeMetadataCachePtr tryGetMergeTreeMetadataCache() const;
#endif
@@ -969,6 +997,9 @@ public:
void initializeMergeTreeMetadataCache(const String & dir, size_t size);
#endif
+ /// Call after unexpected crash happen.
+ void handleCrash() const;
+
bool hasTraceCollector() const;
/// Nullptr if the query log is not ready for this moment.
@@ -1029,6 +1060,13 @@ public:
void setConfigReloadCallback(ConfigReloadCallback && callback);
void reloadConfig() const;
+ using StartStopServersCallback = std::function;
+ void setStartServersCallback(StartStopServersCallback && callback);
+ void setStopServersCallback(StartStopServersCallback && callback);
+
+ void startServers(const ServerType & server_type) const;
+ void stopServers(const ServerType & server_type) const;
+
void shutdown();
bool isInternalQuery() const { return is_internal_query; }
@@ -1154,10 +1192,6 @@ private:
void initGlobal();
- /// Compute and set actual user settings, client_info.current_user should be set
- void calculateAccessRights();
- void recalculateAccessRightsIfNeeded(std::string_view setting_name);
-
template
void checkAccessImpl(const Args &... args) const;
diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp
index 379c9122cc8..ec693eb7931 100644
--- a/src/Interpreters/CrashLog.cpp
+++ b/src/Interpreters/CrashLog.cpp
@@ -83,9 +83,6 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co
stack_trace.toStringEveryLine([&trace_full](std::string_view line) { trace_full.push_back(line); });
CrashLogElement element{static_cast(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full};
- crash_log_owned->add(element);
- /// Notify savingThreadFunction to start flushing crash log
- /// Crash log is storing in parallel with the signal processing thread.
- crash_log_owned->notifyFlush(true);
+ crash_log_owned->add(std::move(element));
}
}
diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h
index 78794574c82..65714295be4 100644
--- a/src/Interpreters/CrashLog.h
+++ b/src/Interpreters/CrashLog.h
@@ -45,6 +45,11 @@ public:
{
crash_log = crash_log_;
}
+
+ static consteval size_t getDefaultMaxSize() { return 1024; }
+ static consteval size_t getDefaultReservedSize() { return 1024; }
+ static consteval size_t getDefaultFlushIntervalMilliseconds() { return 1000; }
+ static consteval size_t shouldNotifyFlushOnCrash() { return true; }
};
}
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index b24856a6146..4e684f5899f 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -199,7 +199,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z
auto query_context = Context::createCopy(from_context);
query_context->makeQueryContext();
query_context->setCurrentQueryId(""); // generate random query_id
- query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+ query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
if (entry.settings)
query_context->applySettingsChanges(*entry.settings);
return query_context;
@@ -439,8 +439,8 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context)
ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper)
{
auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper);
- query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
- query_context->getClientInfo().is_replicated_database_internal = true;
+ query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
+ query_context->setQueryKindReplicatedDatabaseInternal();
query_context->setCurrentDatabase(database->getDatabaseName());
auto txn = std::make_shared(zookeeper, database->zookeeper_path, is_initial_query, entry_path);
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 81c78000ac3..92e6bcb326c 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -476,7 +476,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep
query_context->setSetting("implicit_transaction", Field{0});
}
- query_context->getClientInfo().initial_query_id = task.entry.initial_query_id;
+ query_context->setInitialQueryId(task.entry.initial_query_id);
if (!task.is_initial_query)
query_scope.emplace(query_context);
@@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
- OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ ,
+ OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__,
task.entry.tracing_context,
this->context->getOpenTelemetrySpanLog());
tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER;
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 23a67f4bc2f..13cac5afb1b 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -349,6 +349,15 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
DatabasePtr database;
{
+ // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part.
+ // So, fail early and gracefully...
+ if (!table_id.hasDatabase())
+ {
+ if (exception)
+ exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name"));
+ return {};
+ }
+
std::lock_guard lock{databases_mutex};
auto it = databases.find(table_id.getDatabaseName());
if (databases.end() == it)
@@ -697,6 +706,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_)
, loading_dependencies{"LoadingDeps"}
, view_dependencies{"ViewDeps"}
, log(&Poco::Logger::get("DatabaseCatalog"))
+ , first_async_drop_in_queue(tables_marked_dropped.end())
{
}
@@ -959,9 +969,17 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
std::lock_guard lock(tables_marked_dropped_mutex);
if (ignore_delay)
- tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time});
+ {
+ /// Insert it before first_async_drop_in_queue, so sync drop queries will have priority over async ones,
+ /// but the queue will remain fair for multiple sync drop queries.
+ tables_marked_dropped.emplace(first_async_drop_in_queue, TableMarkedAsDropped{table_id, table, dropped_metadata_path, drop_time});
+ }
else
+ {
tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec});
+ if (first_async_drop_in_queue == tables_marked_dropped.end())
+ --first_async_drop_in_queue;
+ }
tables_marked_dropped_ids.insert(table_id.uuid);
CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1);
@@ -1012,6 +1030,8 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
/// This maybe throw exception.
renameNoReplace(latest_metadata_dropped_path, table_metadata_path);
+ if (first_async_drop_in_queue == it_dropped_table)
+ ++first_async_drop_in_queue;
tables_marked_dropped.erase(it_dropped_table);
[[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(dropped_table.table_id.uuid);
assert(removed);
@@ -1074,6 +1094,8 @@ void DatabaseCatalog::dropTableDataTask()
table = std::move(*it);
LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}",
tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs());
+ if (first_async_drop_in_queue == it)
+ ++first_async_drop_in_queue;
tables_marked_dropped.erase(it);
/// Schedule the task as soon as possible, while there are suitable tables to drop.
schedule_after_ms = 0;
@@ -1110,6 +1132,8 @@ void DatabaseCatalog::dropTableDataTask()
table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec;
std::lock_guard lock(tables_marked_dropped_mutex);
tables_marked_dropped.emplace_back(std::move(table));
+ if (first_async_drop_in_queue == tables_marked_dropped.end())
+ --first_async_drop_in_queue;
/// If list of dropped tables was empty, schedule a task to retry deletion.
if (tables_marked_dropped.size() == 1)
{
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index d502505027f..805d7786569 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -323,6 +323,7 @@ private:
mutable std::mutex ddl_guards_mutex;
TablesMarkedAsDropped tables_marked_dropped TSA_GUARDED_BY(tables_marked_dropped_mutex);
+ TablesMarkedAsDropped::iterator first_async_drop_in_queue TSA_GUARDED_BY(tables_marked_dropped_mutex);
std::unordered_set tables_marked_dropped_ids TSA_GUARDED_BY(tables_marked_dropped_mutex);
mutable std::mutex tables_marked_dropped_mutex;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9a450fabd5b..9aee61eb8f0 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -548,15 +548,17 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo
void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions)
{
- for (const ASTFunction * node : aggregates())
+ for (const ASTPtr & ast : aggregates())
{
+ const ASTFunction & node = typeid_cast(*ast);
+
AggregateDescription aggregate;
- if (node->arguments)
- getRootActionsNoMakeSet(node->arguments, actions);
+ if (node.arguments)
+ getRootActionsNoMakeSet(node.arguments, actions);
- aggregate.column_name = node->getColumnName();
+ aggregate.column_name = node.getColumnName();
- const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
+ const ASTs & arguments = node.arguments ? node.arguments->children : ASTs();
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
@@ -568,7 +570,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' in aggregate function '{}'",
- name, node->formatForErrorMessage());
+ name, node.formatForErrorMessage());
}
types[i] = dag_node->result_type;
@@ -576,8 +578,8 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
}
AggregateFunctionProperties properties;
- aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array();
- aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties);
+ aggregate.parameters = (node.parameters) ? getAggregateFunctionParametersArray(node.parameters, "", getContext()) : Array();
+ aggregate.function = AggregateFunctionFactory::instance().get(node.name, types, aggregate.parameters, properties);
descriptions.push_back(aggregate);
}
@@ -744,12 +746,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
}
// Window functions
- for (const ASTFunction * function_node : syntax->window_function_asts)
+ for (const ASTPtr & ast : syntax->window_function_asts)
{
- assert(function_node->is_window_function);
+ const ASTFunction & function_node = typeid_cast(*ast);
+ assert(function_node.is_window_function);
WindowFunctionDescription window_function;
- window_function.function_node = function_node;
+ window_function.function_node = &function_node;
window_function.column_name
= window_function.function_node->getColumnName();
window_function.function_parameters
@@ -760,7 +763,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
- // Hence we clone the node (not very sane either, I know).
+ // Hence, we clone the node (not very sane either, I know).
getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
const ASTs & arguments
@@ -793,22 +796,22 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Find the window corresponding to this function. It may be either
// referenced by name and previously defined in WINDOW clause, or it
// may be defined inline.
- if (!function_node->window_name.empty())
+ if (!function_node.window_name.empty())
{
- auto it = window_descriptions.find(function_node->window_name);
+ auto it = window_descriptions.find(function_node.window_name);
if (it == std::end(window_descriptions))
{
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Window '{}' is not defined (referenced by '{}')",
- function_node->window_name,
- function_node->formatForErrorMessage());
+ function_node.window_name,
+ function_node.formatForErrorMessage());
}
it->second.window_functions.push_back(window_function);
}
else
{
- const auto & definition = function_node->window_definition->as<
+ const auto & definition = function_node.window_definition->as<
const ASTWindowDefinition &>();
WindowDescription desc;
desc.window_name = definition.getDefaultWindowName();
@@ -1323,10 +1326,13 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
GetAggregatesVisitor(data).visit(select_query->orderBy());
/// TODO: data.aggregates -> aggregates()
- for (const ASTFunction * node : data.aggregates)
- if (node->arguments)
- for (auto & argument : node->arguments->children)
+ for (const ASTPtr & ast : data.aggregates)
+ {
+ const ASTFunction & node = typeid_cast(*ast);
+ if (node.arguments)
+ for (auto & argument : node.arguments->children)
getRootActions(argument, only_types, step.actions());
+ }
}
void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 271c3943afc..941194e69ff 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -168,7 +168,7 @@ protected:
const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
- const std::vector & aggregates() const { return syntax->aggregates; }
+ const ASTs & aggregates() const { return syntax->aggregates; }
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain);
diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h
index fdf54de3e57..7bf6591af69 100644
--- a/src/Interpreters/GetAggregatesVisitor.h
+++ b/src/Interpreters/GetAggregatesVisitor.h
@@ -26,8 +26,8 @@ public:
// Explicit empty initializers are needed to make designated initializers
// work on GCC 10.
std::unordered_set uniq_names {};
- std::vector aggregates {};
- std::vector window_functions {};
+ ASTs aggregates;
+ ASTs window_functions;
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
@@ -61,7 +61,7 @@ public:
}
private:
- static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
+ static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
{
if (isAggregateFunction(node))
{
@@ -74,7 +74,7 @@ private:
return;
data.uniq_names.insert(column_name);
- data.aggregates.push_back(&node);
+ data.aggregates.push_back(ast);
}
else if (node.is_window_function)
{
@@ -87,7 +87,7 @@ private:
return;
data.uniq_names.insert(column_name);
- data.window_functions.push_back(&node);
+ data.window_functions.push_back(ast);
}
}
diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp
index edf604bc0b4..5d72cf20740 100644
--- a/src/Interpreters/GraceHashJoin.cpp
+++ b/src/Interpreters/GraceHashJoin.cpp
@@ -302,7 +302,8 @@ void GraceHashJoin::initBuckets()
bool GraceHashJoin::isSupported(const std::shared_ptr & table_join)
{
bool is_asof = (table_join->strictness() == JoinStrictness::Asof);
- return !is_asof && isInnerOrLeft(table_join->kind()) && table_join->oneDisjunct();
+ auto kind = table_join->kind();
+ return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct();
}
GraceHashJoin::~GraceHashJoin() = default;
@@ -322,7 +323,6 @@ bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) con
/// One row can't be split, avoid loop
if (total_rows < 2)
return false;
-
bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes);
if (has_overflow)
@@ -494,17 +494,30 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const
return hash_join_is_empty;
}
-IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const
+/// Each bucket are handled by the following steps
+/// 1. build hash_join by the right side blocks.
+/// 2. join left side with the hash_join,
+/// 3. read right non-joined blocks from hash_join.
+/// buckets are handled one by one, each hash_join will not be release before the right non-joined blocks are emitted.
+///
+/// There is a finished counter in JoiningTransform/DelayedJoinedBlocksWorkerTransform,
+/// only one processor could take the non-joined blocks from right stream, and ensure all rows from
+/// left stream have been emitted before this.
+IBlocksStreamPtr
+GraceHashJoin::getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size_) const
{
- /// We do no support returning non joined blocks here.
- /// TODO: They _should_ be reported by getDelayedBlocks instead
- return nullptr;
+ return hash_join->getNonJoinedBlocks(left_sample_block_, result_sample_block_, max_block_size_);
}
class GraceHashJoin::DelayedBlocks : public IBlocksStream
{
public:
- explicit DelayedBlocks(size_t current_bucket_, Buckets buckets_, InMemoryJoinPtr hash_join_, const Names & left_key_names_, const Names & right_key_names_)
+ explicit DelayedBlocks(
+ size_t current_bucket_,
+ Buckets buckets_,
+ InMemoryJoinPtr hash_join_,
+ const Names & left_key_names_,
+ const Names & right_key_names_)
: current_bucket(current_bucket_)
, buckets(std::move(buckets_))
, hash_join(std::move(hash_join_))
@@ -522,12 +535,15 @@ public:
do
{
+ // One DelayedBlocks is shared among multiple DelayedJoinedBlocksWorkerTransform.
+ // There is a lock inside left_reader.read() .
block = left_reader.read();
if (!block)
{
return {};
}
+ // block comes from left_reader, need to join with right table to get the result.
Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets);
block = std::move(blocks[current_idx]);
diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h
index bce04ee6b04..ce519892b0e 100644
--- a/src/Interpreters/GraceHashJoin.h
+++ b/src/Interpreters/GraceHashJoin.h
@@ -13,7 +13,6 @@
namespace DB
{
-
class TableJoin;
class HashJoin;
@@ -79,7 +78,7 @@ public:
bool supportTotals() const override { return false; }
IBlocksStreamPtr
- getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
+ getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size) const override;
/// Open iterator over joined blocks.
/// Must be called after all @joinBlock calls.
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 0beb4492aef..ed927d550a8 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
std::vector> tables_to_drop;
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
- iterator->table()->flush();
+ iterator->table()->flushAndPrepareForShutdown();
tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()});
}
@@ -451,11 +451,11 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr
auto drop_context = Context::createCopy(global_context);
if (ignore_sync_setting)
drop_context->setSetting("database_atomic_wait_for_drop_and_detach_synchronously", false);
- drop_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+ drop_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// For Replicated database
- drop_context->getClientInfo().is_replicated_database_internal = true;
+ drop_context->setQueryKindReplicatedDatabaseInternal();
drop_context->setQueryContext(std::const_pointer_cast(current_context));
drop_context->initZooKeeperMetadataTransaction(txn, true);
}
diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp
index 75d43b541e1..ae79b3f932e 100644
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename
required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable());
if (rename.exchange)
{
- required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable());
+ required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable());
required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable());
}
}
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 32812151b59..fc3ea3a13ca 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2274,8 +2274,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
&& !settings.allow_experimental_query_deduplication
&& !settings.empty_result_for_aggregation_by_empty_set
&& storage
- && storage->getName() != "MaterializedMySQL"
- && !storage->hasLightweightDeletedMask()
+ && storage->supportsTrivialCountOptimization()
&& query_info.filter_asts.empty()
&& query_analyzer->hasAggregation()
&& (query_analyzer->aggregates().size() == 1)
@@ -3183,7 +3182,7 @@ void InterpreterSelectQuery::initSettings()
if (query.settings())
InterpreterSetQuery(query.settings(), context).executeForCurrentContext(options.ignore_setting_constraints);
- auto & client_info = context->getClientInfo();
+ const auto & client_info = context->getClientInfo();
auto min_major = DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
auto min_minor = DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD;
diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index 6db57a4f950..e9118b747e5 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
}
else if (const auto * explain_query = ast->as())
{
+ if (explain_query->settings_ast)
+ InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
+
applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
}
else if (const auto * query_with_output = dynamic_cast(ast.get()))
diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp
index 51311c82eeb..149420006fb 100644
--- a/src/Interpreters/InterpreterShowIndexesQuery.cpp
+++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp
@@ -40,20 +40,20 @@ SELECT *
FROM (
(SELECT
name AS table,
- 0 AS non_unique,
+ 1 AS non_unique,
'PRIMARY' AS key_name,
- NULL AS seq_in_index,
- NULL AS column_name,
+ row_number() over (order by column_name) AS seq_in_index,
+ arrayJoin(splitByString(', ', primary_key)) AS column_name,
'A' AS collation,
- NULL AS cardinality,
+ 0 AS cardinality,
NULL AS sub_part,
NULL AS packed,
NULL AS null,
- 'primary' AS index_type,
- NULL AS comment,
- NULL AS index_comment,
+ 'PRIMARY' AS index_type,
+ '' AS comment,
+ '' AS index_comment,
'YES' AS visible,
- primary_key AS expression
+ '' AS expression
FROM system.tables
WHERE
database = '{0}'
@@ -61,18 +61,18 @@ FROM (
UNION ALL (
SELECT
table AS table,
- 0 AS non_unique,
+ 1 AS non_unique,
name AS key_name,
- NULL AS seq_in_index,
- NULL AS column_name,
+ 1 AS seq_in_index,
+ '' AS column_name,
NULL AS collation,
- NULL AS cardinality,
+ 0 AS cardinality,
NULL AS sub_part,
NULL AS packed,
NULL AS null,
- type AS index_type,
- NULL AS comment,
- NULL AS index_comment,
+ upper(type) AS index_type,
+ '' AS comment,
+ '' AS index_comment,
'YES' AS visible,
expr AS expression
FROM system.data_skipping_indices
@@ -80,12 +80,27 @@ FROM (
database = '{0}'
AND table = '{1}'))
{2}
-ORDER BY index_type, expression;)", database, table, where_expression);
+ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression);
/// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to
/// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some
/// SQL tests can take advantage of this.
+ /// Note about compatibility of fields 'column_name', 'seq_in_index' and 'expression' with MySQL:
+ /// MySQL has non-functional and functional indexes.
+ /// - Non-functional indexes only reference columns, e.g. 'col1, col2'. In this case, `SHOW INDEX` produces as many result rows as there
+ /// are indexed columns. 'column_name' and 'seq_in_index' (an ascending integer 1, 2, ...) are filled, 'expression' is empty.
+ /// - Functional indexes can reference arbitrary expressions, e.g. 'col1 + 1, concat(col2, col3)'. 'SHOW INDEX' produces a single row
+ /// with `column_name` and `seq_in_index` empty and `expression` filled with the entire index expression. Only non-primary-key indexes
+ /// can be functional indexes.
+ /// Above SELECT tries to emulate that. Caveats:
+ /// 1. The primary key index sub-SELECT assumes the primary key expression is non-functional. Non-functional primary key indexes in
+ /// ClickHouse are possible but quiete obscure. In MySQL they are not possible at all.
+ /// 2. Related to 1.: Poor man's tuple parsing with splitByString() in the PK sub-SELECT messes up for functional primary key index
+ /// expressions where the comma is not only used as separator between tuple components, e.g. in 'col1 + 1, concat(col2, col3)'.
+ /// 3. The data skipping index sub-SELECT assumes the index expression is functional. 3rd party tools that expect MySQL semantics from
+ /// SHOW INDEX will probably not care as MySQL has no skipping indexes and they only use the result to figure out the primary key.
+
return rewritten_query;
}
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 02cdeb0154e..3207da9941a 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -38,6 +38,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -555,9 +556,25 @@ BlockIO InterpreterSystemQuery::execute()
);
break;
}
- case Type::STOP_LISTEN_QUERIES:
- case Type::START_LISTEN_QUERIES:
- throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type);
+ case Type::STOP_LISTEN:
+ getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
+ getContext()->stopServers(query.server_type);
+ break;
+ case Type::START_LISTEN:
+ getContext()->checkAccess(AccessType::SYSTEM_LISTEN);
+ getContext()->startServers(query.server_type);
+ break;
+ case Type::FLUSH_ASYNC_INSERT_QUEUE:
+ {
+ getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
+ auto * queue = getContext()->getAsynchronousInsertQueue();
+ if (!queue)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Cannot flush asynchronous insert queue because it is not initialized");
+
+ queue->flushAll();
+ break;
+ }
case Type::STOP_THREAD_FUZZER:
getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
ThreadFuzzer::stop();
@@ -1149,6 +1166,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS);
break;
}
+ case Type::FLUSH_ASYNC_INSERT_QUEUE:
+ {
+ required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
+ break;
+ }
case Type::RESTART_DISK:
{
required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK);
@@ -1164,8 +1186,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE);
break;
}
- case Type::STOP_LISTEN_QUERIES:
- case Type::START_LISTEN_QUERIES:
+ case Type::STOP_LISTEN:
+ case Type::START_LISTEN:
+ {
+ required_access.emplace_back(AccessType::SYSTEM_LISTEN);
+ break;
+ }
case Type::STOP_THREAD_FUZZER:
case Type::START_THREAD_FUZZER:
case Type::ENABLE_FAILPOINT:
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index ee5c288afbb..29add31fd5d 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -337,6 +337,11 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se
LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name);
return nullptr;
}
+ if (dictionary->getSpecialKeyType() == DictionarySpecialKeyType::Range)
+ {
+ LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name);
+ return nullptr;
+ }
auto dictionary_kv = std::dynamic_pointer_cast(dictionary);
table_join->setStorageJoin(dictionary_kv);
diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp
index 24f77f7d0ba..24e9e4487ae 100644
--- a/src/Interpreters/MetricLog.cpp
+++ b/src/Interpreters/MetricLog.cpp
@@ -113,7 +113,7 @@ void MetricLog::metricThreadFunction()
elem.current_metrics[i] = CurrentMetrics::values[i];
}
- this->add(elem);
+ this->add(std::move(elem));
/// We will record current time into table but align it to regular time intervals to avoid time drift.
/// We may drop some time points if the server is overloaded and recording took too much time.
diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp
index 881fcae4de6..a97f1f405bc 100644
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@@ -242,7 +242,7 @@ bool PartLog::addNewParts(
elem.profile_counters = part_log_entry.profile_counters;
- part_log->add(elem);
+ part_log->add(std::move(elem));
}
}
catch (...)
diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp
index e78a07bb752..14159ad3438 100644
--- a/src/Interpreters/ProcessorsProfileLog.cpp
+++ b/src/Interpreters/ProcessorsProfileLog.cpp
@@ -73,12 +73,5 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(output_bytes);
}
-ProcessorsProfileLog::ProcessorsProfileLog(ContextPtr context_, const String & database_name_,
- const String & table_name_, const String & storage_def_,
- size_t flush_interval_milliseconds_)
- : SystemLog(context_, database_name_, table_name_,
- storage_def_, flush_interval_milliseconds_)
-{
-}
}
diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h
index 81d58edd913..63791c0374c 100644
--- a/src/Interpreters/ProcessorsProfileLog.h
+++ b/src/Interpreters/ProcessorsProfileLog.h
@@ -45,12 +45,7 @@ struct ProcessorProfileLogElement
class ProcessorsProfileLog : public SystemLog
{
public:
- ProcessorsProfileLog(
- ContextPtr context_,
- const String & database_name_,
- const String & table_name_,
- const String & storage_def_,
- size_t flush_interval_milliseconds_);
+ using SystemLog::SystemLog;
};
}
diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp
index bf8d060bd3c..bd421ae8e33 100644
--- a/src/Interpreters/ProfileEventsExt.cpp
+++ b/src/Interpreters/ProfileEventsExt.cpp
@@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl
columns[i++]->insert(static_cast(snapshot.current_time));
columns[i++]->insert(static_cast(snapshot.thread_id));
columns[i++]->insert(Type::GAUGE);
-
columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME));
- columns[i++]->insert(snapshot.memory_usage);
+ columns[i]->insert(snapshot.memory_usage);
+
+ i = 0;
+ columns[i++]->insertData(host_name.data(), host_name.size());
+ columns[i++]->insert(static_cast(snapshot.current_time));
+ columns[i++]->insert(static_cast(snapshot.thread_id));
+ columns[i++]->insert(Type::GAUGE);
+ columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME));
+ columns[i]->insert(snapshot.peak_memory_usage);
}
void getProfileEvents(
@@ -121,6 +128,7 @@ void getProfileEvents(
group_snapshot.thread_id = 0;
group_snapshot.current_time = time(nullptr);
group_snapshot.memory_usage = thread_group->memory_tracker.get();
+ group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak();
auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot();
auto prev_group_snapshot = last_sent_snapshots.find(0);
group_snapshot.counters =
diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h
index 7d9fc512d15..cc338530510 100644
--- a/src/Interpreters/ProfileEventsExt.h
+++ b/src/Interpreters/ProfileEventsExt.h
@@ -16,6 +16,7 @@ struct ProfileEventsSnapshot
UInt64 thread_id;
CountersIncrement counters;
Int64 memory_usage;
+ Int64 peak_memory_usage;
time_t current_time;
};
diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp
index ec0315c2f95..df21e82305a 100644
--- a/src/Interpreters/QueryLog.cpp
+++ b/src/Interpreters/QueryLog.cpp
@@ -41,6 +41,15 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"ExceptionWhileProcessing", static_cast(EXCEPTION_WHILE_PROCESSING)}
});
+ auto query_cache_usage_datatype = std::make_shared(
+ DataTypeEnum8::Values
+ {
+ {"Unknown", static_cast(QueryCache::Usage::Unknown)},
+ {"None", static_cast(QueryCache::Usage::None)},
+ {"Write", static_cast(QueryCache::Usage::Write)},
+ {"Read", static_cast(QueryCache::Usage::Read)}
+ });
+
auto low_cardinality_string = std::make_shared(std::make_shared());
auto array_low_cardinality_string = std::make_shared(low_cardinality_string);
@@ -126,6 +135,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"transaction_id", getTransactionIDDataType()},
+ {"query_cache_usage", std::move(query_cache_usage_datatype)},
+
{"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())},
};
}
@@ -277,6 +288,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id});
+ columns[i++]->insert(query_cache_usage);
+
if (async_read_counters)
async_read_counters->dumpToMapColumn(columns[i++].get());
else
diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h
index 570d1297239..5bc80280eac 100644
--- a/src/Interpreters/QueryLog.h
+++ b/src/Interpreters/QueryLog.h
@@ -4,8 +4,9 @@
#include
#include
#include
-#include
+#include
#include
+#include
#include
#include
#include
@@ -96,6 +97,8 @@ struct QueryLogElement
TransactionID tid;
+ QueryCache::Usage query_cache_usage = QueryCache::Usage::Unknown;
+
static std::string name() { return "QueryLog"; }
static NamesAndTypesList getNamesAndTypes();
diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp
index 0fbcfc9e6a1..68411e80755 100644
--- a/src/Interpreters/ServerAsynchronousMetrics.cpp
+++ b/src/Interpreters/ServerAsynchronousMetrics.cpp
@@ -92,6 +92,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values
" The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."};
}
+ if (auto query_cache = getContext()->getQueryCache())
+ {
+ new_values["QueryCacheBytes"] = { query_cache->weight(), "Total size of the query cache in bytes." };
+ new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." };
+ }
+
{
auto caches = FileCacheFactory::instance().getAll();
size_t total_bytes = 0;
diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index 64f7b4fc934..cadf619700c 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -240,7 +240,7 @@ private:
if (session != sessions.end() && session->second->close_cycle <= current_cycle)
{
- if (!session->second.unique())
+ if (session->second.use_count() != 1)
{
LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}", key.second, key.first);
@@ -299,7 +299,10 @@ Session::~Session()
if (notified_session_log_about_login)
{
if (auto session_log = getSessionLog())
+ {
+ /// TODO: We have to ensure that the same info is added to the session log on a LoginSuccess event and on the corresponding Logout event.
session_log->addLogOut(auth_id, user, getClientInfo());
+ }
}
}
@@ -368,17 +371,117 @@ void Session::onAuthenticationFailure(const std::optional & user_name, c
}
}
-ClientInfo & Session::getClientInfo()
-{
- /// FIXME it may produce different info for LoginSuccess and the corresponding Logout entries in the session log
- return session_context ? session_context->getClientInfo() : *prepared_client_info;
-}
-
const ClientInfo & Session::getClientInfo() const
{
return session_context ? session_context->getClientInfo() : *prepared_client_info;
}
+void Session::setClientInfo(const ClientInfo & client_info)
+{
+ if (session_context)
+ session_context->setClientInfo(client_info);
+ else
+ prepared_client_info = client_info;
+}
+
+void Session::setClientName(const String & client_name)
+{
+ if (session_context)
+ session_context->setClientName(client_name);
+ else
+ prepared_client_info->client_name = client_name;
+}
+
+void Session::setClientInterface(ClientInfo::Interface interface)
+{
+ if (session_context)
+ session_context->setClientInterface(interface);
+ else
+ prepared_client_info->interface = interface;
+}
+
+void Session::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+{
+ if (session_context)
+ {
+ session_context->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version);
+ }
+ else
+ {
+ prepared_client_info->client_version_major = client_version_major;
+ prepared_client_info->client_version_minor = client_version_minor;
+ prepared_client_info->client_version_patch = client_version_patch;
+ prepared_client_info->client_tcp_protocol_version = client_tcp_protocol_version;
+ }
+}
+
+void Session::setClientConnectionId(uint32_t connection_id)
+{
+ if (session_context)
+ session_context->setClientConnectionId(connection_id);
+ else
+ prepared_client_info->connection_id = connection_id;
+}
+
+void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
+{
+ if (session_context)
+ {
+ session_context->setHttpClientInfo(http_method, http_user_agent, http_referer);
+ }
+ else
+ {
+ prepared_client_info->http_method = http_method;
+ prepared_client_info->http_user_agent = http_user_agent;
+ prepared_client_info->http_referer = http_referer;
+ }
+}
+
+void Session::setForwardedFor(const String & forwarded_for)
+{
+ if (session_context)
+ session_context->setForwardedFor(forwarded_for);
+ else
+ prepared_client_info->forwarded_for = forwarded_for;
+}
+
+void Session::setQuotaClientKey(const String & quota_key)
+{
+ if (session_context)
+ session_context->setQuotaClientKey(quota_key);
+ else
+ prepared_client_info->quota_key = quota_key;
+}
+
+void Session::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version)
+{
+ if (session_context)
+ {
+ session_context->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version);
+ }
+ else
+ {
+ prepared_client_info->connection_client_version_major = client_version_major;
+ prepared_client_info->connection_client_version_minor = client_version_minor;
+ prepared_client_info->connection_client_version_patch = client_version_patch;
+ prepared_client_info->connection_tcp_protocol_version = client_tcp_protocol_version;
+ }
+}
+
+const OpenTelemetry::TracingContext & Session::getClientTraceContext() const
+{
+ if (session_context)
+ return session_context->getClientTraceContext();
+ return prepared_client_info->client_trace_context;
+}
+
+OpenTelemetry::TracingContext & Session::getClientTraceContext()
+{
+ if (session_context)
+ return session_context->getClientTraceContext();
+ return prepared_client_info->client_trace_context;
+}
+
ContextMutablePtr Session::makeSessionContext()
{
if (session_context)
@@ -396,8 +499,7 @@ ContextMutablePtr Session::makeSessionContext()
new_session_context->makeSessionContext();
/// Copy prepared client info to the new session context.
- auto & res_client_info = new_session_context->getClientInfo();
- res_client_info = std::move(prepared_client_info).value();
+ new_session_context->setClientInfo(*prepared_client_info);
prepared_client_info.reset();
/// Set user information for the new context: current profiles, roles, access rights.
@@ -436,8 +538,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
/// Copy prepared client info to the session context, no matter it's been just created or not.
/// If we continue using a previously created session context found by session ID
/// it's necessary to replace the client info in it anyway, because it contains actual connection information (client address, etc.)
- auto & res_client_info = new_session_context->getClientInfo();
- res_client_info = std::move(prepared_client_info).value();
+ new_session_context->setClientInfo(*prepared_client_info);
prepared_client_info.reset();
/// Set user information for the new context: current profiles, roles, access rights.
@@ -492,32 +593,28 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
}
/// Copy the specified client info to the new query context.
- auto & res_client_info = query_context->getClientInfo();
if (client_info_to_move)
- res_client_info = std::move(*client_info_to_move);
+ query_context->setClientInfo(*client_info_to_move);
else if (client_info_to_copy && (client_info_to_copy != &getClientInfo()))
- res_client_info = *client_info_to_copy;
+ query_context->setClientInfo(*client_info_to_copy);
/// Copy current user's name and address if it was authenticated after query_client_info was initialized.
if (prepared_client_info && !prepared_client_info->current_user.empty())
{
- res_client_info.current_user = prepared_client_info->current_user;
- res_client_info.current_address = prepared_client_info->current_address;
+ query_context->setCurrentUserName(prepared_client_info->current_user);
+ query_context->setCurrentAddress(prepared_client_info->current_address);
}
/// Set parameters of initial query.
- if (res_client_info.query_kind == ClientInfo::QueryKind::NO_QUERY)
- res_client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY;
+ if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::NO_QUERY)
+ query_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY);
- if (res_client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
+ if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
{
- res_client_info.initial_user = res_client_info.current_user;
- res_client_info.initial_address = res_client_info.current_address;
+ query_context->setInitialUserName(query_context->getClientInfo().current_user);
+ query_context->setInitialAddress(query_context->getClientInfo().current_address);
}
- /// Sets that row policies of the initial user should be used too.
- query_context->enableRowPoliciesOfInitialUser();
-
/// Set user information for the new context: current profiles, roles, access rights.
if (user_id && !query_context->getAccess()->tryGetUser())
query_context->setUser(*user_id);
@@ -566,4 +663,3 @@ void Session::closeSession(const String & session_id)
}
}
-
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index d7c06a60464..36f811ccd24 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -54,10 +54,23 @@ public:
/// Writes a row about login failure into session log (if enabled)
void onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e);
- /// Returns a reference to session ClientInfo.
- ClientInfo & getClientInfo();
+ /// Returns a reference to the session's ClientInfo.
const ClientInfo & getClientInfo() const;
+ /// Modify the session's ClientInfo.
+ void setClientInfo(const ClientInfo & client_info);
+ void setClientName(const String & client_name);
+ void setClientInterface(ClientInfo::Interface interface);
+ void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
+ void setClientConnectionId(uint32_t connection_id);
+ void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
+ void setForwardedFor(const String & forwarded_for);
+ void setQuotaClientKey(const String & quota_key);
+ void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
+
+ const OpenTelemetry::TracingContext & getClientTraceContext() const;
+ OpenTelemetry::TracingContext & getClientTraceContext();
+
/// Makes a session context, can be used one or zero times.
/// The function also assigns an user to this context.
ContextMutablePtr makeSessionContext();
diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp
index c930013e52b..0a8a7fc18c5 100644
--- a/src/Interpreters/SessionLog.cpp
+++ b/src/Interpreters/SessionLog.cpp
@@ -227,7 +227,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses
for (const auto & s : settings.allChanged())
log_entry.settings.emplace_back(s.getName(), s.getValueString());
- add(log_entry);
+ add(std::move(log_entry));
}
void SessionLog::addLoginFailure(
@@ -243,7 +243,7 @@ void SessionLog::addLoginFailure(
log_entry.client_info = info;
log_entry.user_identified_with = AuthenticationType::NO_PASSWORD;
- add(log_entry);
+ add(std::move(log_entry));
}
void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info)
@@ -257,7 +257,7 @@ void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, con
log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : "";
log_entry.client_info = client_info;
- add(log_entry);
+ add(std::move(log_entry));
}
}
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 3fd0297f5b8..be0468aa876 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -101,7 +101,6 @@ namespace
namespace
{
-constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000;
/// Creates a system log with MergeTree engine using parameters from config
@@ -124,18 +123,23 @@ std::shared_ptr createSystemLog(
LOG_DEBUG(&Poco::Logger::get("SystemLog"),
"Creating {}.{} from {}", default_database_name, default_table_name, config_prefix);
- String database = config.getString(config_prefix + ".database", default_database_name);
- String table = config.getString(config_prefix + ".table", default_table_name);
+ SystemLogSettings log_settings;
+ log_settings.queue_settings.database = config.getString(config_prefix + ".database", default_database_name);
+ log_settings.queue_settings.table = config.getString(config_prefix + ".table", default_table_name);
- if (database != default_database_name)
+ if (log_settings.queue_settings.database != default_database_name)
{
/// System tables must be loaded before other tables, but loading order is undefined for all databases except `system`
- LOG_ERROR(&Poco::Logger::get("SystemLog"), "Custom database name for a system table specified in config."
- " Table `{}` will be created in `system` database instead of `{}`", table, database);
- database = default_database_name;
+ LOG_ERROR(
+ &Poco::Logger::get("SystemLog"),
+ "Custom database name for a system table specified in config."
+ " Table `{}` will be created in `system` database instead of `{}`",
+ log_settings.queue_settings.table,
+ log_settings.queue_settings.database);
+
+ log_settings.queue_settings.database = default_database_name;
}
- String engine;
if (config.has(config_prefix + ".engine"))
{
if (config.has(config_prefix + ".partition_by"))
@@ -159,26 +163,26 @@ std::shared_ptr createSystemLog(
"If 'engine' is specified for system table, SETTINGS parameters should "
"be specified directly inside 'engine' and 'settings' setting doesn't make sense");
- engine = config.getString(config_prefix + ".engine");
+ log_settings.engine = config.getString(config_prefix + ".engine");
}
else
{
/// ENGINE expr is necessary.
- engine = "ENGINE = MergeTree";
+ log_settings.engine = "ENGINE = MergeTree";
/// PARTITION expr is not necessary.
String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)");
if (!partition_by.empty())
- engine += " PARTITION BY (" + partition_by + ")";
+ log_settings.engine += " PARTITION BY (" + partition_by + ")";
/// TTL expr is not necessary.
String ttl = config.getString(config_prefix + ".ttl", "");
if (!ttl.empty())
- engine += " TTL " + ttl;
+ log_settings.engine += " TTL " + ttl;
/// ORDER BY expr is necessary.
String order_by = config.getString(config_prefix + ".order_by", TSystemLog::getDefaultOrderBy());
- engine += " ORDER BY (" + order_by + ")";
+ log_settings.engine += " ORDER BY (" + order_by + ")";
/// SETTINGS expr is not necessary.
/// https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#settings
@@ -188,24 +192,52 @@ std::shared_ptr createSystemLog(
String settings = config.getString(config_prefix + ".settings", "");
if (!storage_policy.empty() || !settings.empty())
{
- engine += " SETTINGS";
+ log_settings.engine += " SETTINGS";
/// If 'storage_policy' is repeated, the 'settings' configuration is preferred.
if (!storage_policy.empty())
- engine += " storage_policy = " + quoteString(storage_policy);
+ log_settings.engine += " storage_policy = " + quoteString(storage_policy);
if (!settings.empty())
- engine += (storage_policy.empty() ? " " : ", ") + settings;
+ log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings;
}
}
/// Validate engine definition syntax to prevent some configuration errors.
ParserStorageWithComment storage_parser;
- parseQuery(storage_parser, engine.data(), engine.data() + engine.size(),
+ parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
"Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
- size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",
- DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
+ log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",
+ TSystemLog::getDefaultFlushIntervalMilliseconds());
- return std::make_shared(context, database, table, engine, flush_interval_milliseconds);
+ log_settings.queue_settings.max_size_rows = config.getUInt64(config_prefix + ".max_size_rows",
+ TSystemLog::getDefaultMaxSize());
+
+ if (log_settings.queue_settings.max_size_rows < 1)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "{0}.max_size_rows {1} should be 1 at least",
+ config_prefix,
+ log_settings.queue_settings.max_size_rows);
+
+ log_settings.queue_settings.reserved_size_rows = config.getUInt64(config_prefix + ".reserved_size_rows",
+ TSystemLog::getDefaultReservedSize());
+
+ if (log_settings.queue_settings.max_size_rows < log_settings.queue_settings.reserved_size_rows)
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "{0}.max_size_rows {1} should be greater or equal to {0}.reserved_size_rows {2}",
+ config_prefix,
+ log_settings.queue_settings.max_size_rows,
+ log_settings.queue_settings.reserved_size_rows);
+ }
+
+ log_settings.queue_settings.buffer_size_rows_flush_threshold = config.getUInt64(config_prefix + ".buffer_size_rows_flush_threshold",
+ log_settings.queue_settings.max_size_rows / 2);
+
+ log_settings.queue_settings.notify_flush_on_crash = config.getBool(config_prefix + ".flush_on_crash",
+ TSystemLog::shouldNotifyFlushOnCrash());
+
+ log_settings.queue_settings.turn_off_logger = TSystemLog::shouldTurnOffLogger();
+
+ return std::make_shared(context, log_settings);
}
@@ -325,22 +357,25 @@ void SystemLogs::shutdown()
log->shutdown();
}
+void SystemLogs::handleCrash()
+{
+ for (auto & log : logs)
+ log->handleCrash();
+}
template
SystemLog::SystemLog(
ContextPtr context_,
- const String & database_name_,
- const String & table_name_,
- const String & storage_def_,
- size_t flush_interval_milliseconds_)
- : WithContext(context_)
- , table_id(database_name_, table_name_)
- , storage_def(storage_def_)
+ const SystemLogSettings & settings_,
+ std::shared_ptr> queue_)
+ : Base(settings_.queue_settings, queue_)
+ , WithContext(context_)
+ , log(&Poco::Logger::get("SystemLog (" + settings_.queue_settings.database + "." + settings_.queue_settings.table + ")"))
+ , table_id(settings_.queue_settings.database, settings_.queue_settings.table)
+ , storage_def(settings_.engine)
, create_query(serializeAST(*getCreateTableQuery()))
- , flush_interval_milliseconds(flush_interval_milliseconds_)
{
- assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE);
- log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")");
+ assert(settings_.queue_settings.database == DatabaseCatalog::SYSTEM_DATABASE);
}
template
@@ -353,6 +388,26 @@ void SystemLog::shutdown()
table->flushAndShutdown();
}
+template
+void SystemLog::stopFlushThread()
+{
+ {
+ std::lock_guard lock(thread_mutex);
+
+ if (!saving_thread || !saving_thread->joinable())
+ return;
+
+ if (is_shutdown)
+ return;
+
+ is_shutdown = true;
+ queue->shutdown();
+ }
+
+ saving_thread->join();
+}
+
+
template
void SystemLog::savingThreadFunction()
{
@@ -370,27 +425,7 @@ void SystemLog::savingThreadFunction()
// Should we prepare table even if there are no new messages.
bool should_prepare_tables_anyway = false;
- {
- std::unique_lock lock(mutex);
- flush_event.wait_for(lock,
- std::chrono::milliseconds(flush_interval_milliseconds),
- [&] ()
- {
- return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables;
- }
- );
-
- queue_front_index += queue.size();
- to_flush_end = queue_front_index;
- // Swap with existing array from previous flush, to save memory
- // allocations.
- to_flush.resize(0);
- queue.swap(to_flush);
-
- should_prepare_tables_anyway = is_force_prepare_tables;
-
- exit_this_thread = is_shutdown;
- }
+ to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread);
if (to_flush.empty())
{
@@ -399,9 +434,7 @@ void SystemLog::savingThreadFunction()
prepareTable();
LOG_TRACE(log, "Table created (force)");
- std::lock_guard lock(mutex);
- is_force_prepare_tables = false;
- flush_event.notify_all();
+ queue->confirm(to_flush_end);
}
}
else
@@ -473,12 +506,7 @@ void SystemLog::flushImpl(const std::vector & to_flush,
tryLogCurrentException(__PRETTY_FUNCTION__);
}
- {
- std::lock_guard lock(mutex);
- flushed_up_to = to_flush_end;
- is_force_prepare_tables = false;
- flush_event.notify_all();
- }
+ queue->confirm(to_flush_end);
LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end);
}
@@ -618,7 +646,6 @@ ASTPtr SystemLog::getCreateTableQuery()
return create;
}
-
#define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog;
SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG)
diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h
index 84b70c67e2a..437b1b2a6bb 100644
--- a/src/Interpreters/SystemLog.h
+++ b/src/Interpreters/SystemLog.h
@@ -58,6 +58,7 @@ struct SystemLogs
~SystemLogs();
void shutdown();
+ void handleCrash();
std::shared_ptr query_log; /// Used to log queries.
std::shared_ptr query_thread_log; /// Used to log query threads.
@@ -87,6 +88,12 @@ struct SystemLogs
std::vector logs;
};
+struct SystemLogSettings
+{
+ SystemLogQueueSettings queue_settings;
+
+ String engine;
+};
template
class SystemLog : public SystemLogBase, private boost::noncopyable, WithContext
@@ -103,37 +110,35 @@ public:
* where N - is a minimal number from 1, for that table with corresponding name doesn't exist yet;
* and new table get created - as if previous table was not exist.
*/
- SystemLog(
- ContextPtr context_,
- const String & database_name_,
- const String & table_name_,
- const String & storage_def_,
- size_t flush_interval_milliseconds_);
+ SystemLog(ContextPtr context_,
+ const SystemLogSettings& settings_,
+ std::shared_ptr> queue_ = nullptr);
+
+ /** Append a record into log.
+ * Writing to table will be done asynchronously and in case of failure, record could be lost.
+ */
void shutdown() override;
+ void stopFlushThread() override;
+
protected:
- using ISystemLog::mutex;
+ Poco::Logger * log;
+
using ISystemLog::is_shutdown;
- using ISystemLog::flush_event;
- using ISystemLog::stopFlushThread;
- using Base::log;
+ using ISystemLog::saving_thread;
+ using ISystemLog::thread_mutex;
using Base::queue;
- using Base::queue_front_index;
- using Base::is_force_prepare_tables;
- using Base::requested_flush_up_to;
- using Base::flushed_up_to;
- using Base::logged_queue_full_at_index;
private:
+
/* Saving thread data */
const StorageID table_id;
const String storage_def;
String create_query;
String old_create_query;
bool is_prepared = false;
- const size_t flush_interval_milliseconds;
/** Creates new table if it does not exist.
* Renames old table if its structure is not suitable.
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index ba3befab59b..5d14a57759f 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -223,10 +223,10 @@ public:
{
/// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
/// It's behaviour that was initially supported by clickhouse.
- bool is_enbaled_by_default = val == JoinAlgorithm::DEFAULT
+ bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT
|| val == JoinAlgorithm::HASH
|| val == JoinAlgorithm::DIRECT;
- if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enbaled_by_default)
+ if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default)
return true;
return join_algorithm.isSet(val);
}
diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp
index 45d5a7b2344..3951a41f0c5 100644
--- a/src/Interpreters/TextLog.cpp
+++ b/src/Interpreters/TextLog.cpp
@@ -80,15 +80,10 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(message_format_string);
}
-TextLog::TextLog(ContextPtr context_, const String & database_name_,
- const String & table_name_, const String & storage_def_,
- size_t flush_interval_milliseconds_)
- : SystemLog(context_, database_name_, table_name_,
- storage_def_, flush_interval_milliseconds_)
+TextLog::TextLog(ContextPtr context_,
+ const SystemLogSettings & settings)
+ : SystemLog(context_, settings, getLogQueue(settings.queue_settings))
{
- // SystemLog methods may write text logs, so we disable logging for the text
- // log table to avoid recursion.
- log->setLevel(0);
}
}
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 6efc1c906d4..4bfed5327f3 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -40,12 +40,17 @@ struct TextLogElement
class TextLog : public SystemLog