diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index ef554a1b0ff..6b05f1fe9f4 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -138,19 +138,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6d150f37a27..fff058ecf87 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -242,20 +242,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - # FIXME: avoid using 0 checkout - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index b3ac2135e50..56617294fb6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -104,7 +104,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Fast tests + test_name: Fast test runner_type: builder data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -273,19 +273,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 69229ef75df..6d54e558b70 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -153,19 +153,26 @@ jobs: ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ - DockerServerImages: + DockerServerImage: needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Docker server and keeper images + test_name: Docker server image runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 run_command: | - cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse + DockerKeeperImage: + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Docker keeper image + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} + run_command: | python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ @@ -456,7 +463,8 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: - - DockerServerImages + - DockerServerImage + - DockerKeeperImage - BuilderReport - BuilderSpecialReport - MarkReleaseReady diff --git a/CHANGELOG.md b/CHANGELOG.md index ea3c954776a..50db3292ca8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,18 +21,18 @@ #### New Feature * Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). * Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)). -* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). +* Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). * Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). * Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). * Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)). -* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). -* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). -* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). +* Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). +* Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). +* Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). * Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)). * Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)). -* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). +* Added function `sqidDecode` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). * Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)). -* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Added function `seriesDecomposeSTL` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). * Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)). * Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)). * Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)). diff --git a/SECURITY.md b/SECURITY.md index a200e172a3b..79ca0269838 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 24.1 | ✔️ | | 23.12 | ✔️ | | 23.11 | ✔️ | -| 23.10 | ✔️ | +| 23.10 | ❌ | | 23.9 | ❌ | | 23.8 | ✔️ | | 23.7 | ❌ | diff --git a/base/base/sort.h b/base/base/sort.h index 1a814587763..99bf8a0830e 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -64,19 +64,14 @@ using ComparatorWrapper = Comparator; #include -template -void nth_element(RandomIt first, RandomIt nth, RandomIt last) +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare) { - using value_type = typename std::iterator_traits::value_type; - using comparator = std::less; - - comparator compare; - ComparatorWrapper compare_wrapper = compare; - #ifndef NDEBUG ::shuffle(first, last); #endif + ComparatorWrapper compare_wrapper = compare; ::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper); #ifndef NDEBUG @@ -87,6 +82,15 @@ void nth_element(RandomIt first, RandomIt nth, RandomIt last) #endif } +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last) +{ + using value_type = typename std::iterator_traits::value_type; + using comparator = std::less; + + ::nth_element(first, nth, last, comparator()); +} + template void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) { diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index e5a8c064808..885080a3e38 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54482) +SET(VERSION_REVISION 54483) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 1) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8) -SET(VERSION_DESCRIBE v24.1.1.1-testing) -SET(VERSION_STRING 24.1.1.1) +SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17) +SET(VERSION_DESCRIBE v24.2.1.1-testing) +SET(VERSION_STRING 24.2.1.1) # end of autochange diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 9b98ed6efb3..4f60304d74d 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -16,29 +16,30 @@ message(STATUS "Checking Rust toolchain for current target") # See https://doc.rust-lang.org/nightly/rustc/platform-support.html -if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") - set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) - set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") - set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) - set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") - set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) - set(Rust_CARGO_TARGET "x86_64-apple-darwin") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) - set(Rust_CARGO_TARGET "aarch64-apple-darwin") -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) - set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") -elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") - set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") -else() - message(FATAL_ERROR "Unsupported rust target") -endif() - -message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") +if(DEFINED CMAKE_TOOLCHAIN_FILE) + if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") + set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") + set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") + set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-apple-darwin") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) + set(Rust_CARGO_TARGET "aarch64-apple-darwin") + elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") + elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") + set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") + else() + message(FATAL_ERROR "Unsupported rust target") + endif() + message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") +endif () # FindRust.cmake list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") diff --git a/contrib/curl b/contrib/curl index d755a5f7c00..7161cb17c01 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit d755a5f7c009dd63a61b2c745180d8ba937cbfeb +Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd diff --git a/contrib/libxml2 b/contrib/libxml2 index 223cb03a5d2..8292f361458 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6 +Subproject commit 8292f361458fcffe0bff515a385be02e9d35582c diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h index c2faeb47cb1..d8535e91a0e 100644 --- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -21,7 +21,7 @@ extern "C" { * your library and includes mismatch */ #ifndef LIBXML2_COMPILING_MSCCDEF -XMLPUBFUN void XMLCALL xmlCheckVersion(int version); +XMLPUBFUN void xmlCheckVersion(int version); #endif /* LIBXML2_COMPILING_MSCCDEF */ /** @@ -29,28 +29,28 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.10.3" +#define LIBXML_DOTTED_VERSION "2.12.4" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 21003 +#define LIBXML_VERSION 21204 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "21003" +#define LIBXML_VERSION_STRING "21204" /** * LIBXML_VERSION_EXTRA: * * extra version information, used to show a git commit description */ -#define LIBXML_VERSION_EXTRA "" +#define LIBXML_VERSION_EXTRA "-GITv2.12.4" /** * LIBXML_TEST_VERSION: @@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(21003); +#define LIBXML_TEST_VERSION xmlCheckVersion(21204); #ifndef VMS #if 0 @@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * Whether iconv support is available */ -#if 0 +#if 1 #define LIBXML_ICONV_ENABLED #endif @@ -313,7 +313,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); /** * LIBXML_DEBUG_RUNTIME: * - * Whether the runtime debugging is configured in + * Removed */ #if 0 #define LIBXML_DEBUG_RUNTIME @@ -409,12 +409,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); #endif #ifdef __GNUC__ - -/** - * ATTRIBUTE_UNUSED: - * - * Macro used to signal to GCC unused function parameters - */ +/** DOC_DISABLE */ #ifndef ATTRIBUTE_UNUSED # if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7))) @@ -424,12 +419,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); # endif #endif -/** - * LIBXML_ATTR_ALLOC_SIZE: - * - * Macro used to indicate to GCC this is an allocator function - */ - #ifndef LIBXML_ATTR_ALLOC_SIZE # if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) # define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x))) @@ -440,12 +429,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); # define LIBXML_ATTR_ALLOC_SIZE(x) #endif -/** - * LIBXML_ATTR_FORMAT: - * - * Macro used to indicate to GCC the parameter are printf like - */ - #ifndef LIBXML_ATTR_FORMAT # if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3))) # define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args))) @@ -457,44 +440,69 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); #endif #ifndef XML_DEPRECATED -# ifdef IN_LIBXML +# if defined (IN_LIBXML) || (__GNUC__ * 100 + __GNUC_MINOR__ < 301) # define XML_DEPRECATED -# else /* Available since at least GCC 3.1 */ +# else # define XML_DEPRECATED __attribute__((deprecated)) # endif #endif +#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) + #if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 800) + #define XML_IGNORE_FPTR_CAST_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") + #else + #define XML_IGNORE_FPTR_CAST_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") + #endif + #define XML_POP_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else + #define XML_IGNORE_FPTR_CAST_WARNINGS + #define XML_POP_WARNINGS +#endif + #else /* ! __GNUC__ */ -/** - * ATTRIBUTE_UNUSED: - * - * Macro used to signal to GCC unused function parameters - */ #define ATTRIBUTE_UNUSED -/** - * LIBXML_ATTR_ALLOC_SIZE: - * - * Macro used to indicate to GCC this is an allocator function - */ #define LIBXML_ATTR_ALLOC_SIZE(x) -/** - * LIBXML_ATTR_FORMAT: - * - * Macro used to indicate to GCC the parameter are printf like - */ #define LIBXML_ATTR_FORMAT(fmt,args) -/** - * XML_DEPRECATED: - * - * Macro used to indicate that a function, variable, type or struct member - * is deprecated. - */ #ifndef XML_DEPRECATED -#define XML_DEPRECATED +# if defined (IN_LIBXML) || !defined (_MSC_VER) +# define XML_DEPRECATED +/* Available since Visual Studio 2005 */ +# elif defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_DEPRECATED __declspec(deprecated) +# endif +#endif +#if defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_IGNORE_FPTR_CAST_WARNINGS __pragma(warning(push)) +#else +# define XML_IGNORE_FPTR_CAST_WARNINGS +#endif +#ifndef XML_POP_WARNINGS +# if defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_POP_WARNINGS __pragma(warning(pop)) +# else +# define XML_POP_WARNINGS +# endif #endif #endif /* __GNUC__ */ +#define XML_NO_ATTR + +#ifdef LIBXML_THREAD_ENABLED + #define XML_DECLARE_GLOBAL(name, type, attrs) \ + attrs XMLPUBFUN type *__##name(void); + #define XML_GLOBAL_MACRO(name) (*__##name()) +#else + #define XML_DECLARE_GLOBAL(name, type, attrs) \ + attrs XMLPUBVAR type name; +#endif + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index d09060912d8..76e620314a2 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,5 +1,6 @@ -if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory") - # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. +if (APPLE OR SANITIZE STREQUAL "memory") + # llvm-tblgen, that is used during LLVM build, will throw MSAN errors when running (breaking the build) + # TODO: Retest when upgrading LLVM or build only llvm-tblgen without sanitizers set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) set (ENABLE_DWARF_PARSER_DEFAULT OFF) else() diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 4b5e8cd3970..82405d63df9 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.1.2048" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 452d8539a48..f48e14aba50 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.1.2048" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 0cefa3c14cb..60ef7a67563 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.12.2.59" +ARG VERSION="24.1.1.2048" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 05b9ec2a06f..ea76447aef2 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -293,10 +293,10 @@ if [ $failed_to_save_logs -ne 0 ]; then # for files >64MB, we want this files to be compressed explicitly for table in query_log zookeeper_log trace_log transactions_info_log metric_log do - clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: - clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi done fi diff --git a/docs/changelogs/v24.1.1.2048-stable.md b/docs/changelogs/v24.1.1.2048-stable.md new file mode 100644 index 00000000000..8e4647da86e --- /dev/null +++ b/docs/changelogs/v24.1.1.2048-stable.md @@ -0,0 +1,438 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.1.2048-stable (5a024dfc093) FIXME as compared to v23.12.1.1368-stable (a2faa65b080) + +#### Backward Incompatible Change +* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix sumMapFiltered with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)). +* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Kusto dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/ split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)). +* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)). +* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)). +* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). +* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)). +* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)). +* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)). +* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). +* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)). +* `clickhouse-format` improvements: * support INSERT queries with `VALUES` * support comments (use `--comments` to output them) * support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)). +* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)). +* Added FROM modifier for SYSTEM SYNC REPLICA LIGHTWEIGHT query. The FROM modifier ensures we wait for for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)). +* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). +* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). +* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). +* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)). +* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)). +* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Performance Improvement +* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads=1`) produced sorted output without explicitly provided `ORDER BY` clause. This behaviour no longer exists when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)). +* Optimize array element function when input is array(map)/array(array(num)/array(array(string))/array(bigint)/array(decimal). Current implementation causes too many reallocs. The optimization speed up by ~6x especially when input type is array(map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)). +* Bypass `Poco::BasicBufferedStreamBuf` abstraction when reading from S3 (namely `ReadBufferFromIStream`) to avoid extra copying of data. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)). +* Read column once while reading more that one subcolumn from it in Compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite the AST of sum(column + literal) function. [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)). +* The evaluation of function `match()` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)). +* Default coordinator for parallel replicas is rewritten for better cache locality (same mark ranges are almost always assigned to the same replicas). Consistent hashing is used also during work stealing, so better tail latency is expected. It has been tested for linear scalability on a hundred of replicas. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)). +* MergeTree FINAL to not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)). +* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)). +* The evaluation of function `match()` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)). +* Speedup MIN/MAX for non numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)). +* Enable JIT compilation for aggregation without a key. Closes [#41461](https://github.com/ClickHouse/ClickHouse/issues/41461). Originally [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757). [#58440](https://github.com/ClickHouse/ClickHouse/pull/58440) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The performance experiments of **OnTime** on the Intel server with up to AVX2 (and BMI2) support show that this change could effectively improve the QPS of **Q2** and **Q3** by **5.0%** and **3.7%** through reducing the cycle ratio of the hotspot, **_DB::MergeTreeRangeReader::ReadResult::optimize_**, **from 11.48% to 1.09%** and **from 8.09% to 0.67%** respectively while having no impact on others. [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)). +* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)). +* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* Enable MySQL/MariaDB on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not interpret numbers with leading zeroes as octals. [#55575](https://github.com/ClickHouse/ClickHouse/pull/55575) ([Joanna Hulboj](https://github.com/jh0x)). +* Replace HTTP outgoing buffering based on std ostream with CH Buffer. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)). +* Add `` config parameter that allows avoiding resolving hostnames in DDLWorker. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)). +* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)). +* ClusterDiscovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)). +* Support query parameters in ALTER TABLE ... PART. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)). +* Create consumers for Kafka tables on fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)). +* Sparkbar as an alias to sparkbar. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)). +* Avoid sending ComposeObject requests after upload to GCS. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)). +* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)). +* Added comments (brief descriptions) to all columns of system tables. The are several reasons fro this: - We use system tables a lot and sometimes is could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow registering database engines independently. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)). +* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)). +* Provide hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)). +* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)). +* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)). +* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)). +* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)). +* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)). +* Improve S3 compatible, add Ecloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)). +* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avro format support Zstd codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fixing a problem described in [#58719](https://github.com/ClickHouse/ClickHouse/issues/58719). [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)). +* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)). +* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)). +* Function `seriesDecomposeSTL()` now returns a baseline component as season + trend components. [#58961](https://github.com/ClickHouse/ClickHouse/pull/58961) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Fix memory management in copyDataToS3File. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)). +* Change working directory to data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)). +* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes https://github.com/ClickHouse/ClickHouse/pull/59120#issuecomment-1906177350. [#59122](https://github.com/ClickHouse/ClickHouse/pull/59122) ([Arthur Passos](https://github.com/arthurpassos)). +* The state of URL's #hash in the dashboard is now compressed using [lz-string](https://github.com/pieroxy/lz-string). The default size of the state string is compressed from 6856B to 2823B. [#59124](https://github.com/ClickHouse/ClickHouse/pull/59124) ([Amos Bird](https://github.com/amosbird)). +* Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). +* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#59293](https://github.com/ClickHouse/ClickHouse/pull/59293) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fix bug in experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). CC @SmitaRKulkarni. [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)). +* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve exception message of function regexp_extract, close [#56393](https://github.com/ClickHouse/ClickHouse/issues/56393). [#59319](https://github.com/ClickHouse/ClickHouse/pull/59319) ([李扬](https://github.com/taiyang-li)). +* Support the FORMAT clause in BACKUP and RESTORE queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function `concatWithSeparator()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build/Testing/Packaging Improvement +* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)). +* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)). +* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)). +* Enable Rust on OSX ARM64 (this will add fuzzy search in client with skim and prql language, though I don't think that are people who hosts ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). +* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). +* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). +* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). +* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). +* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). +* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). +* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). +* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). +* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). +* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). +* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). +* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). +* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). +* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). +* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). +* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix an error in the release script - it didn't allow to make 23.12."'. [#58381](https://github.com/ClickHouse/ClickHouse/pull/58381) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Use CH Buffer for HTTP out stream, add metrics for interfaces"'. [#58450](https://github.com/ClickHouse/ClickHouse/pull/58450) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Second attempt: Use CH Buffer for HTTP out stream, add metrics for interfaces'. [#58475](https://github.com/ClickHouse/ClickHouse/pull/58475) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* NO CL ENTRY: 'Revert "Merging [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757)"'. [#58542](https://github.com/ClickHouse/ClickHouse/pull/58542) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add support for MySQL `net_write_timeout` and `net_read_timeout` settings"'. [#58872](https://github.com/ClickHouse/ClickHouse/pull/58872) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Extend performance test norm_dist.xml"'. [#58989](https://github.com/ClickHouse/ClickHouse/pull/58989) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"'. [#58990](https://github.com/ClickHouse/ClickHouse/pull/58990) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Allow parallel replicas for JOIN with analyzer [part 1]."'. [#59059](https://github.com/ClickHouse/ClickHouse/pull/59059) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Consume leading zeroes when parsing a number in ConstantExpressionTemplate"'. [#59070](https://github.com/ClickHouse/ClickHouse/pull/59070) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Allow parallel replicas for JOIN with analyzer [part 1].""'. [#59076](https://github.com/ClickHouse/ClickHouse/pull/59076) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition"'. [#59120](https://github.com/ClickHouse/ClickHouse/pull/59120) ([Robert Schulze](https://github.com/rschu1ze)). +* NO CL ENTRY: 'DisksApp.cpp: fix typo (specifiged → specified)'. [#59140](https://github.com/ClickHouse/ClickHouse/pull/59140) ([Nikolay Edigaryev](https://github.com/edigaryev)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Analyzer: Fix resolving subcolumns in JOIN [#49703](https://github.com/ClickHouse/ClickHouse/pull/49703) ([vdimir](https://github.com/vdimir)). +* Analyzer: always qualify execution names [#53705](https://github.com/ClickHouse/ClickHouse/pull/53705) ([Dmitry Novik](https://github.com/novikd)). +* Insert quorum: check host node version in addition [#55528](https://github.com/ClickHouse/ClickHouse/pull/55528) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove more old code of projection analysis [#55579](https://github.com/ClickHouse/ClickHouse/pull/55579) ([Anton Popov](https://github.com/CurtizJ)). +* Better exception messages in input formats [#57053](https://github.com/ClickHouse/ClickHouse/pull/57053) ([Kruglov Pavel](https://github.com/Avogar)). +* Parallel replicas custom key: skip unavailable replicas [#57235](https://github.com/ClickHouse/ClickHouse/pull/57235) ([Igor Nikonov](https://github.com/devcrafter)). +* Small change in log message in MergeTreeDataMergerMutator [#57550](https://github.com/ClickHouse/ClickHouse/pull/57550) ([Nikita Taranov](https://github.com/nickitat)). +* fs cache: small optimization [#57615](https://github.com/ClickHouse/ClickHouse/pull/57615) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Customizable dependency failure handling for AsyncLoader [#57697](https://github.com/ClickHouse/ClickHouse/pull/57697) ([Sergei Trifonov](https://github.com/serxa)). +* Bring test back [#57700](https://github.com/ClickHouse/ClickHouse/pull/57700) ([Nikita Taranov](https://github.com/nickitat)). +* Change default database name in clickhouse-local to 'default' [#57774](https://github.com/ClickHouse/ClickHouse/pull/57774) ([Kruglov Pavel](https://github.com/Avogar)). +* Add option `--show-whitespaces-in-diff` to clickhouse-test [#57870](https://github.com/ClickHouse/ClickHouse/pull/57870) ([vdimir](https://github.com/vdimir)). +* Update `query_masking_rules` when reloading the config, attempt 2 [#57993](https://github.com/ClickHouse/ClickHouse/pull/57993) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Remove unneeded parameter `use_external_buffer` from `AsynchronousReadBuffer*` [#58077](https://github.com/ClickHouse/ClickHouse/pull/58077) ([Nikita Taranov](https://github.com/nickitat)). +* Print another message in Bugfix check if internal check had been failed [#58091](https://github.com/ClickHouse/ClickHouse/pull/58091) ([vdimir](https://github.com/vdimir)). +* Refactor StorageMerge virtual columns filtering. [#58255](https://github.com/ClickHouse/ClickHouse/pull/58255) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer: fix tuple comparison when result is always null [#58266](https://github.com/ClickHouse/ClickHouse/pull/58266) ([vdimir](https://github.com/vdimir)). +* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix timeout in 01732_race_condition_storage_join_long [#58298](https://github.com/ClickHouse/ClickHouse/pull/58298) ([vdimir](https://github.com/vdimir)). +* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable max_bytes_before_external* in 00172_hits_joins [#58309](https://github.com/ClickHouse/ClickHouse/pull/58309) ([vdimir](https://github.com/vdimir)). +* Analyzer: support functional arguments in USING clause [#58317](https://github.com/ClickHouse/ClickHouse/pull/58317) ([Dmitry Novik](https://github.com/novikd)). +* Fixed logical error in CheckSortedTransform [#58318](https://github.com/ClickHouse/ClickHouse/pull/58318) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Refreshable materialized views again [#58320](https://github.com/ClickHouse/ClickHouse/pull/58320) ([Michael Kolupaev](https://github.com/al13n321)). +* Organize symbols from src/* into DB namespace [#58336](https://github.com/ClickHouse/ClickHouse/pull/58336) ([Amos Bird](https://github.com/amosbird)). +* Add a style check against DOS and Windows [#58345](https://github.com/ClickHouse/ClickHouse/pull/58345) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check what happen if remove array joined columns from KeyCondition [#58346](https://github.com/ClickHouse/ClickHouse/pull/58346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Upload time of the perf tests into artifacts as test_duration_ms [#58348](https://github.com/ClickHouse/ClickHouse/pull/58348) ([Azat Khuzhin](https://github.com/azat)). +* Keep exception format string in retries ctl [#58351](https://github.com/ClickHouse/ClickHouse/pull/58351) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix replication.lib helper (system.mutations has database not current_database) [#58352](https://github.com/ClickHouse/ClickHouse/pull/58352) ([Azat Khuzhin](https://github.com/azat)). +* Refactor StorageHDFS and StorageFile virtual columns filtering [#58353](https://github.com/ClickHouse/ClickHouse/pull/58353) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix suspended workers for AsyncLoader [#58362](https://github.com/ClickHouse/ClickHouse/pull/58362) ([Sergei Trifonov](https://github.com/serxa)). +* Remove stale events from README [#58364](https://github.com/ClickHouse/ClickHouse/pull/58364) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not fail the CI on an expired token [#58384](https://github.com/ClickHouse/ClickHouse/pull/58384) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a test for [#38534](https://github.com/ClickHouse/ClickHouse/issues/38534) [#58391](https://github.com/ClickHouse/ClickHouse/pull/58391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix database engine validation inside database factory [#58395](https://github.com/ClickHouse/ClickHouse/pull/58395) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix bad formatting of the `timeDiff` compatibility alias [#58398](https://github.com/ClickHouse/ClickHouse/pull/58398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a comment; remove unused method; stop using pointers [#58399](https://github.com/ClickHouse/ClickHouse/pull/58399) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Make a test not depend on the lack of floating point associativity [#58439](https://github.com/ClickHouse/ClickHouse/pull/58439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `02944_dynamically_change_filesystem_cache_size` [#58445](https://github.com/ClickHouse/ClickHouse/pull/58445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Analyzer: Fix LOGICAL_ERROR with LowCardinality [#58457](https://github.com/ClickHouse/ClickHouse/pull/58457) ([Dmitry Novik](https://github.com/novikd)). +* Replace `std::regex` by re2 [#58458](https://github.com/ClickHouse/ClickHouse/pull/58458) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve perf tests [#58478](https://github.com/ClickHouse/ClickHouse/pull/58478) ([Raúl Marín](https://github.com/Algunenano)). +* Check if I can remove KeyCondition analysis on AST. [#58480](https://github.com/ClickHouse/ClickHouse/pull/58480) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some thread pool settings not updating at runtime [#58485](https://github.com/ClickHouse/ClickHouse/pull/58485) ([Michael Kolupaev](https://github.com/al13n321)). +* Lower log levels for some Raft logs to new test level [#58487](https://github.com/ClickHouse/ClickHouse/pull/58487) ([Antonio Andelic](https://github.com/antonio2368)). +* PartsSplitter small refactoring [#58506](https://github.com/ClickHouse/ClickHouse/pull/58506) ([Maksim Kita](https://github.com/kitaisreal)). +* Sync content of the docker test images [#58507](https://github.com/ClickHouse/ClickHouse/pull/58507) ([Max K.](https://github.com/maxknv)). +* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)). +* Minor fixups for `sqid()` [#58517](https://github.com/ClickHouse/ClickHouse/pull/58517) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.12.2.59-stable [#58545](https://github.com/ClickHouse/ClickHouse/pull/58545) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.11.4.24-stable [#58546](https://github.com/ClickHouse/ClickHouse/pull/58546) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.9.54-lts [#58547](https://github.com/ClickHouse/ClickHouse/pull/58547) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.10.6.60-stable [#58548](https://github.com/ClickHouse/ClickHouse/pull/58548) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.19.32-lts [#58549](https://github.com/ClickHouse/ClickHouse/pull/58549) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update CHANGELOG.md [#58559](https://github.com/ClickHouse/ClickHouse/pull/58559) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix test 02932_kill_query_sleep [#58560](https://github.com/ClickHouse/ClickHouse/pull/58560) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI fix. Add packager script to build digest [#58571](https://github.com/ClickHouse/ClickHouse/pull/58571) ([Max K.](https://github.com/maxknv)). +* fix and test that S3Clients are reused [#58573](https://github.com/ClickHouse/ClickHouse/pull/58573) ([Sema Checherinda](https://github.com/CheSema)). +* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not load database engines in suggest [#58586](https://github.com/ClickHouse/ClickHouse/pull/58586) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong message in Keeper [#58588](https://github.com/ClickHouse/ClickHouse/pull/58588) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add some missing LLVM includes [#58594](https://github.com/ClickHouse/ClickHouse/pull/58594) ([Raúl Marín](https://github.com/Algunenano)). +* Small fix in Keeper [#58598](https://github.com/ClickHouse/ClickHouse/pull/58598) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#58599](https://github.com/ClickHouse/ClickHouse/pull/58599) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Simplify release.py script [#58600](https://github.com/ClickHouse/ClickHouse/pull/58600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update analyzer_tech_debt.txt [#58602](https://github.com/ClickHouse/ClickHouse/pull/58602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Refactor stacktrace symbolizer to avoid copy-paste [#58610](https://github.com/ClickHouse/ClickHouse/pull/58610) ([Azat Khuzhin](https://github.com/azat)). +* Add intel AMX checking [#58617](https://github.com/ClickHouse/ClickHouse/pull/58617) ([Roman Glinskikh](https://github.com/omgronny)). +* Optional `client` argument for `S3Helper` [#58619](https://github.com/ClickHouse/ClickHouse/pull/58619) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add sorting to 02366_kql_summarize.sql [#58621](https://github.com/ClickHouse/ClickHouse/pull/58621) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove more projections code [#58628](https://github.com/ClickHouse/ClickHouse/pull/58628) ([Anton Popov](https://github.com/CurtizJ)). +* Remove finalize() from ~WriteBufferFromEncryptedFile [#58629](https://github.com/ClickHouse/ClickHouse/pull/58629) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update test_replicated_database/test.py [#58647](https://github.com/ClickHouse/ClickHouse/pull/58647) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try disabling `muzzy_decay_ms` in jemalloc [#58648](https://github.com/ClickHouse/ClickHouse/pull/58648) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test_replicated_database::test_startup_without_zk flakiness [#58649](https://github.com/ClickHouse/ClickHouse/pull/58649) ([Azat Khuzhin](https://github.com/azat)). +* Fix 01600_remerge_sort_lowered_memory_bytes_ratio flakiness (due to settings randomization) [#58650](https://github.com/ClickHouse/ClickHouse/pull/58650) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Fix assertion in HashJoin with duplicate columns [#58652](https://github.com/ClickHouse/ClickHouse/pull/58652) ([vdimir](https://github.com/vdimir)). +* Document that `match()` can use `ngrambf_v1` and `tokenbf_v1` indexes [#58655](https://github.com/ClickHouse/ClickHouse/pull/58655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix perf tests duration (checks.test_duration_ms) [#58656](https://github.com/ClickHouse/ClickHouse/pull/58656) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Correctly handle constant set in index [#58657](https://github.com/ClickHouse/ClickHouse/pull/58657) ([Dmitry Novik](https://github.com/novikd)). +* fix a typo in stress randomization setting [#58658](https://github.com/ClickHouse/ClickHouse/pull/58658) ([Sema Checherinda](https://github.com/CheSema)). +* Small follow-up to `std::regex` --> `re2` conversion ([#58458](https://github.com/ClickHouse/ClickHouse/issues/58458)) [#58678](https://github.com/ClickHouse/ClickHouse/pull/58678) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `` from libcxx [#58681](https://github.com/ClickHouse/ClickHouse/pull/58681) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bad log message [#58698](https://github.com/ClickHouse/ClickHouse/pull/58698) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some small improvements to version_helper from [#57203](https://github.com/ClickHouse/ClickHouse/issues/57203) [#58712](https://github.com/ClickHouse/ClickHouse/pull/58712) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Small fixes in different helpers [#58717](https://github.com/ClickHouse/ClickHouse/pull/58717) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix bug in new (not released yet) parallel replicas coordinator [#58722](https://github.com/ClickHouse/ClickHouse/pull/58722) ([Nikita Taranov](https://github.com/nickitat)). +* Analyzer: Fix LOGICAL_ERROR in CountDistinctPass [#58723](https://github.com/ClickHouse/ClickHouse/pull/58723) ([Dmitry Novik](https://github.com/novikd)). +* Fix reading of offsets subcolumn (`size0`) from `Nested` [#58729](https://github.com/ClickHouse/ClickHouse/pull/58729) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Mac OS X [#58733](https://github.com/ClickHouse/ClickHouse/pull/58733) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix stress with generate-template-key [#58740](https://github.com/ClickHouse/ClickHouse/pull/58740) ([Sema Checherinda](https://github.com/CheSema)). +* more relaxed check [#58751](https://github.com/ClickHouse/ClickHouse/pull/58751) ([Sema Checherinda](https://github.com/CheSema)). +* Fix usage of small buffers for remote reading [#58768](https://github.com/ClickHouse/ClickHouse/pull/58768) ([Nikita Taranov](https://github.com/nickitat)). +* Add missing includes when _LIBCPP_REMOVE_TRANSITIVE_INCLUDES enabled [#58770](https://github.com/ClickHouse/ClickHouse/pull/58770) ([Artem Alperin](https://github.com/hdnpth)). +* Remove some code [#58772](https://github.com/ClickHouse/ClickHouse/pull/58772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some code [#58790](https://github.com/ClickHouse/ClickHouse/pull/58790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix trash in performance tests [#58794](https://github.com/ClickHouse/ClickHouse/pull/58794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in Keeper [#58806](https://github.com/ClickHouse/ClickHouse/pull/58806) ([Antonio Andelic](https://github.com/antonio2368)). +* Increase log level to trace to help debug `00993_system_parts_race_condition_drop_zookeeper` [#58809](https://github.com/ClickHouse/ClickHouse/pull/58809) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* DatabaseCatalog background tasks add log names [#58832](https://github.com/ClickHouse/ClickHouse/pull/58832) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Resolve GROUPING function on shards [#58833](https://github.com/ClickHouse/ClickHouse/pull/58833) ([Dmitry Novik](https://github.com/novikd)). +* Allow parallel replicas for JOIN with analyzer [part 1]. [#58838](https://github.com/ClickHouse/ClickHouse/pull/58838) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `isRetry` method [#58839](https://github.com/ClickHouse/ClickHouse/pull/58839) ([alesapin](https://github.com/alesapin)). +* fs cache: fix data race in slru [#58842](https://github.com/ClickHouse/ClickHouse/pull/58842) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix reading from an invisible part in new (not released yet) parallel replicas coordinator [#58844](https://github.com/ClickHouse/ClickHouse/pull/58844) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bad log message [#58849](https://github.com/ClickHouse/ClickHouse/pull/58849) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set max_bytes_before_external_group_by in 01961_roaring_memory_tracking [#58863](https://github.com/ClickHouse/ClickHouse/pull/58863) ([vdimir](https://github.com/vdimir)). +* Fix `00089_group_by_arrays_of_fixed` with external aggregation [#58873](https://github.com/ClickHouse/ClickHouse/pull/58873) ([Antonio Andelic](https://github.com/antonio2368)). +* DiskWeb minor improvement in loading [#58874](https://github.com/ClickHouse/ClickHouse/pull/58874) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix RPN construction for indexHint [#58875](https://github.com/ClickHouse/ClickHouse/pull/58875) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer: add test with GROUP BY on shards [#58876](https://github.com/ClickHouse/ClickHouse/pull/58876) ([Dmitry Novik](https://github.com/novikd)). +* Jepsen job to reuse builds [#58881](https://github.com/ClickHouse/ClickHouse/pull/58881) ([Max K.](https://github.com/maxknv)). +* Fix ambiguity in the setting description [#58883](https://github.com/ClickHouse/ClickHouse/pull/58883) ([Denny Crane](https://github.com/den-crane)). +* Less error prone interface of read buffers [#58886](https://github.com/ClickHouse/ClickHouse/pull/58886) ([Anton Popov](https://github.com/CurtizJ)). +* Add metric for keeper memory soft limit [#58890](https://github.com/ClickHouse/ClickHouse/pull/58890) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Add a test for [#47988](https://github.com/ClickHouse/ClickHouse/issues/47988) [#58893](https://github.com/ClickHouse/ClickHouse/pull/58893) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Whitespaces [#58894](https://github.com/ClickHouse/ClickHouse/pull/58894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `AggregatingTransform` [#58896](https://github.com/ClickHouse/ClickHouse/pull/58896) ([Antonio Andelic](https://github.com/antonio2368)). +* Update SLRUFileCachePriority.cpp [#58898](https://github.com/ClickHouse/ClickHouse/pull/58898) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add tests for [#57193](https://github.com/ClickHouse/ClickHouse/issues/57193) [#58899](https://github.com/ClickHouse/ClickHouse/pull/58899) ([Raúl Marín](https://github.com/Algunenano)). +* Add log for already download binary in Jepsen [#58901](https://github.com/ClickHouse/ClickHouse/pull/58901) ([Antonio Andelic](https://github.com/antonio2368)). +* fs cache: minor refactoring [#58902](https://github.com/ClickHouse/ClickHouse/pull/58902) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Checking on flaky test_parallel_replicas_custom_key_failover [#58909](https://github.com/ClickHouse/ClickHouse/pull/58909) ([Igor Nikonov](https://github.com/devcrafter)). +* Style fix [#58913](https://github.com/ClickHouse/ClickHouse/pull/58913) ([Dmitry Novik](https://github.com/novikd)). +* Opentelemetry spans to analyze CPU and S3 bottlenecks on inserts [#58914](https://github.com/ClickHouse/ClickHouse/pull/58914) ([Alexander Gololobov](https://github.com/davenger)). +* Fix fault handler in case of thread (for fault handler) cannot be spawned [#58917](https://github.com/ClickHouse/ClickHouse/pull/58917) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer: Support GROUP BY injective function elimination [#58919](https://github.com/ClickHouse/ClickHouse/pull/58919) ([Dmitry Novik](https://github.com/novikd)). +* Cancel MasterCI in PRs [#58920](https://github.com/ClickHouse/ClickHouse/pull/58920) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix and test for azure [#58697](https://github.com/ClickHouse/ClickHouse/issues/58697) [#58921](https://github.com/ClickHouse/ClickHouse/pull/58921) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Extend performance test norm_dist.xml [#58922](https://github.com/ClickHouse/ClickHouse/pull/58922) ([Robert Schulze](https://github.com/rschu1ze)). +* Add regression test for parallel replicas (follow up [#58722](https://github.com/ClickHouse/ClickHouse/issues/58722), [#58844](https://github.com/ClickHouse/ClickHouse/issues/58844)) [#58923](https://github.com/ClickHouse/ClickHouse/pull/58923) ([Nikita Taranov](https://github.com/nickitat)). +* Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892) [#58927](https://github.com/ClickHouse/ClickHouse/pull/58927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `FunctionToSubcolumnsPass` in debug build [#58930](https://github.com/ClickHouse/ClickHouse/pull/58930) ([Anton Popov](https://github.com/CurtizJ)). +* Call `getMaxFileDescriptorCount` once in Keeper [#58938](https://github.com/ClickHouse/ClickHouse/pull/58938) ([Antonio Andelic](https://github.com/antonio2368)). +* Add missing files to digests [#58942](https://github.com/ClickHouse/ClickHouse/pull/58942) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer: fix join column not found with compound identifiers [#58943](https://github.com/ClickHouse/ClickHouse/pull/58943) ([vdimir](https://github.com/vdimir)). +* CI: pr_info to provide event_type for job scripts [#58947](https://github.com/ClickHouse/ClickHouse/pull/58947) ([Max K.](https://github.com/maxknv)). +* Using the destination object for paths generation in S3copy. [#58949](https://github.com/ClickHouse/ClickHouse/pull/58949) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix data race in slru (2) [#58950](https://github.com/ClickHouse/ClickHouse/pull/58950) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_postgresql_replica_database_engine_2/test.py::test_dependent_loading [#58951](https://github.com/ClickHouse/ClickHouse/pull/58951) ([Kseniia Sumarokova](https://github.com/kssenii)). +* More safe way to dump system logs in tests [#58955](https://github.com/ClickHouse/ClickHouse/pull/58955) ([alesapin](https://github.com/alesapin)). +* Add a comment about sparse checkout [#58960](https://github.com/ClickHouse/ClickHouse/pull/58960) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up to [#58357](https://github.com/ClickHouse/ClickHouse/issues/58357) [#58963](https://github.com/ClickHouse/ClickHouse/pull/58963) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better error message about tuples [#58971](https://github.com/ClickHouse/ClickHouse/pull/58971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive) [#58981](https://github.com/ClickHouse/ClickHouse/pull/58981) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02891_array_shingles with analyzer [#58982](https://github.com/ClickHouse/ClickHouse/pull/58982) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix script name in SQL example in executable.md [#58984](https://github.com/ClickHouse/ClickHouse/pull/58984) ([Lino Uruñuela](https://github.com/Wachynaky)). +* Fix typo [#58986](https://github.com/ClickHouse/ClickHouse/pull/58986) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert flaky [#58992](https://github.com/ClickHouse/ClickHouse/pull/58992) ([Raúl Marín](https://github.com/Algunenano)). +* Revive: Parallel replicas custom key: skip unavailable replicas [#58993](https://github.com/ClickHouse/ClickHouse/pull/58993) ([Igor Nikonov](https://github.com/devcrafter)). +* Make performance test `test norm_dist.xml` more realistic [#58995](https://github.com/ClickHouse/ClickHouse/pull/58995) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02404_memory_bound_merging with analyzer (follow up [#56419](https://github.com/ClickHouse/ClickHouse/issues/56419)) [#58996](https://github.com/ClickHouse/ClickHouse/pull/58996) ([Nikita Taranov](https://github.com/nickitat)). +* Add test for [#58930](https://github.com/ClickHouse/ClickHouse/issues/58930) [#58999](https://github.com/ClickHouse/ClickHouse/pull/58999) ([Anton Popov](https://github.com/CurtizJ)). +* initialization ConnectionTimeouts [#59000](https://github.com/ClickHouse/ClickHouse/pull/59000) ([Sema Checherinda](https://github.com/CheSema)). +* DiskWeb fix loading [#59006](https://github.com/ClickHouse/ClickHouse/pull/59006) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update log level for http buffer [#59008](https://github.com/ClickHouse/ClickHouse/pull/59008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)). +* Fix async loader stress test [#59011](https://github.com/ClickHouse/ClickHouse/pull/59011) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `StaticResourceManager` [#59013](https://github.com/ClickHouse/ClickHouse/pull/59013) ([Sergei Trifonov](https://github.com/serxa)). +* preserve 'amz-sdk-invocation-id' and 'amz-sdk-request' headers with gcp [#59015](https://github.com/ClickHouse/ClickHouse/pull/59015) ([Sema Checherinda](https://github.com/CheSema)). +* Update rename.md [#59017](https://github.com/ClickHouse/ClickHouse/pull/59017) ([filimonov](https://github.com/filimonov)). +* очепятка [#59024](https://github.com/ClickHouse/ClickHouse/pull/59024) ([edpyt](https://github.com/edpyt)). +* Split resource scheduler off `IO/` into `Common/Scheduler/` [#59025](https://github.com/ClickHouse/ClickHouse/pull/59025) ([Sergei Trifonov](https://github.com/serxa)). +* Add a parameter for testing purposes [#59027](https://github.com/ClickHouse/ClickHouse/pull/59027) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test 02932_kill_query_sleep when running with query cache [#59041](https://github.com/ClickHouse/ClickHouse/pull/59041) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI: Jepsen: fix sanity check in ci.py [#59043](https://github.com/ClickHouse/ClickHouse/pull/59043) ([Max K.](https://github.com/maxknv)). +* CI: add ci_config classes for job and build names [#59046](https://github.com/ClickHouse/ClickHouse/pull/59046) ([Max K.](https://github.com/maxknv)). +* remove flaky test [#59066](https://github.com/ClickHouse/ClickHouse/pull/59066) ([Sema Checherinda](https://github.com/CheSema)). +* Followup to 57853 [#59068](https://github.com/ClickHouse/ClickHouse/pull/59068) ([Dmitry Novik](https://github.com/novikd)). +* Follow-up to [#59027](https://github.com/ClickHouse/ClickHouse/issues/59027) [#59075](https://github.com/ClickHouse/ClickHouse/pull/59075) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_parallel_replicas_invisible_parts` [#59077](https://github.com/ClickHouse/ClickHouse/pull/59077) ([Nikita Taranov](https://github.com/nickitat)). +* Increase max_bytes_before_external_group_by for 00165_jit_aggregate_functions [#59078](https://github.com/ClickHouse/ClickHouse/pull/59078) ([Raúl Marín](https://github.com/Algunenano)). +* Fix stateless/run.sh [#59079](https://github.com/ClickHouse/ClickHouse/pull/59079) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: hot fix for reuse [#59081](https://github.com/ClickHouse/ClickHouse/pull/59081) ([Max K.](https://github.com/maxknv)). +* Fix server shutdown due to exception while loading metadata [#59083](https://github.com/ClickHouse/ClickHouse/pull/59083) ([Sergei Trifonov](https://github.com/serxa)). +* Coordinator returns ranges for reading in sorted order [#59089](https://github.com/ClickHouse/ClickHouse/pull/59089) ([Nikita Taranov](https://github.com/nickitat)). +* Raise timeout in 02294_decimal_second_errors [#59090](https://github.com/ClickHouse/ClickHouse/pull/59090) ([Raúl Marín](https://github.com/Algunenano)). +* Add `[[nodiscard]]` to a couple of methods [#59093](https://github.com/ClickHouse/ClickHouse/pull/59093) ([Nikita Taranov](https://github.com/nickitat)). +* Docs: Update integer and float aliases [#59100](https://github.com/ClickHouse/ClickHouse/pull/59100) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid election timeouts during startup in Keeper [#59102](https://github.com/ClickHouse/ClickHouse/pull/59102) ([Antonio Andelic](https://github.com/antonio2368)). +* Add missing setting max_estimated_execution_time in SettingsChangesHistory [#59104](https://github.com/ClickHouse/ClickHouse/pull/59104) ([Kruglov Pavel](https://github.com/Avogar)). +* Rename some inverted index test files [#59106](https://github.com/ClickHouse/ClickHouse/pull/59106) ([Robert Schulze](https://github.com/rschu1ze)). +* Further reduce runtime of `norm_distance.xml` [#59108](https://github.com/ClickHouse/ClickHouse/pull/59108) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor follow-up to [#53710](https://github.com/ClickHouse/ClickHouse/issues/53710) [#59109](https://github.com/ClickHouse/ClickHouse/pull/59109) ([Robert Schulze](https://github.com/rschu1ze)). +* Update stateless/run.sh [#59116](https://github.com/ClickHouse/ClickHouse/pull/59116) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Followup 57875 [#59117](https://github.com/ClickHouse/ClickHouse/pull/59117) ([Dmitry Novik](https://github.com/novikd)). +* Fixing build [#59130](https://github.com/ClickHouse/ClickHouse/pull/59130) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Capability check for `s3_plain` [#59145](https://github.com/ClickHouse/ClickHouse/pull/59145) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `02015_async_inserts_stress_long` [#59146](https://github.com/ClickHouse/ClickHouse/pull/59146) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix AggregateFunctionNothing result type issues introducing it with different names [#59147](https://github.com/ClickHouse/ClickHouse/pull/59147) ([vdimir](https://github.com/vdimir)). +* Fix url encoding issue [#59162](https://github.com/ClickHouse/ClickHouse/pull/59162) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Upgrade simdjson to v3.6.3 [#59166](https://github.com/ClickHouse/ClickHouse/pull/59166) ([Robert Schulze](https://github.com/rschu1ze)). +* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix broken cache for non-existing temp_path [#59172](https://github.com/ClickHouse/ClickHouse/pull/59172) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move some headers [#59175](https://github.com/ClickHouse/ClickHouse/pull/59175) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer: Fix CTE name clash resolution [#59177](https://github.com/ClickHouse/ClickHouse/pull/59177) ([Dmitry Novik](https://github.com/novikd)). +* Fix another place with special symbols in the URL [#59184](https://github.com/ClickHouse/ClickHouse/pull/59184) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Actions dag build filter actions refactoring [#59228](https://github.com/ClickHouse/ClickHouse/pull/59228) ([Maksim Kita](https://github.com/kitaisreal)). +* Minor cleanup of msan usage [#59229](https://github.com/ClickHouse/ClickHouse/pull/59229) ([Robert Schulze](https://github.com/rschu1ze)). +* Load server configs in clickhouse local [#59231](https://github.com/ClickHouse/ClickHouse/pull/59231) ([pufit](https://github.com/pufit)). +* Make libssh build dependent on `-DENABLE_LIBRARIES` [#59242](https://github.com/ClickHouse/ClickHouse/pull/59242) ([Robert Schulze](https://github.com/rschu1ze)). +* Disable copy constructor for MultiVersion [#59244](https://github.com/ClickHouse/ClickHouse/pull/59244) ([Vitaly Baranov](https://github.com/vitlibar)). +* CI: fix ci configuration for nightly job [#59252](https://github.com/ClickHouse/ClickHouse/pull/59252) ([Max K.](https://github.com/maxknv)). +* Fix 02475_bson_each_row_format flakiness (due to small parsing block) [#59253](https://github.com/ClickHouse/ClickHouse/pull/59253) ([Azat Khuzhin](https://github.com/azat)). +* Improve pytest --pdb experience by preserving dockerd on SIGINT (v2) [#59255](https://github.com/ClickHouse/ClickHouse/pull/59255) ([Azat Khuzhin](https://github.com/azat)). +* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)). +* Added AtomicLogger [#59273](https://github.com/ClickHouse/ClickHouse/pull/59273) ([Maksim Kita](https://github.com/kitaisreal)). +* Update test_reload_after_fail_in_cache_dictionary for analyzer [#59274](https://github.com/ClickHouse/ClickHouse/pull/59274) ([vdimir](https://github.com/vdimir)). +* Update run.sh [#59280](https://github.com/ClickHouse/ClickHouse/pull/59280) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add missing setting optimize_injective_functions_in_group_by to SettingsChangesHistory [#59283](https://github.com/ClickHouse/ClickHouse/pull/59283) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix perf tests (after sumMap starts to filter out -0.) [#59287](https://github.com/ClickHouse/ClickHouse/pull/59287) ([Azat Khuzhin](https://github.com/azat)). +* Use fresh ZooKeeper client on DROP (to have higher chances on success) [#59288](https://github.com/ClickHouse/ClickHouse/pull/59288) ([Azat Khuzhin](https://github.com/azat)). +* Additional check [#59292](https://github.com/ClickHouse/ClickHouse/pull/59292) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlock in `AsyncLoader::stop()` [#59308](https://github.com/ClickHouse/ClickHouse/pull/59308) ([Sergei Trifonov](https://github.com/serxa)). +* Speed up `00165_jit_aggregate_functions` [#59312](https://github.com/ClickHouse/ClickHouse/pull/59312) ([Nikita Taranov](https://github.com/nickitat)). +* CI: WA for issue with perf test with artifact reuse [#59325](https://github.com/ClickHouse/ClickHouse/pull/59325) ([Max K.](https://github.com/maxknv)). +* Fix typo [#59329](https://github.com/ClickHouse/ClickHouse/pull/59329) ([Raúl Marín](https://github.com/Algunenano)). +* Simplify query_run_metric_arrays in perf tests [#59333](https://github.com/ClickHouse/ClickHouse/pull/59333) ([Raúl Marín](https://github.com/Algunenano)). +* IVolume constructor improve exception message [#59335](https://github.com/ClickHouse/ClickHouse/pull/59335) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix upgrade check for new setting [#59343](https://github.com/ClickHouse/ClickHouse/pull/59343) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix sccache when building without coverage [#59345](https://github.com/ClickHouse/ClickHouse/pull/59345) ([Raúl Marín](https://github.com/Algunenano)). +* Loggers initialization fix [#59347](https://github.com/ClickHouse/ClickHouse/pull/59347) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting update_insert_deduplication_token_in_dependent_materialized_views to settings changes history [#59349](https://github.com/ClickHouse/ClickHouse/pull/59349) ([Maksim Kita](https://github.com/kitaisreal)). +* Slightly better memory usage in `AsynchronousBoundedReadBuffer` [#59354](https://github.com/ClickHouse/ClickHouse/pull/59354) ([Anton Popov](https://github.com/CurtizJ)). +* Try to make variant tests a bit faster [#59355](https://github.com/ClickHouse/ClickHouse/pull/59355) ([Kruglov Pavel](https://github.com/Avogar)). +* Minor typos in Settings.h [#59371](https://github.com/ClickHouse/ClickHouse/pull/59371) ([Jordi Villar](https://github.com/jrdi)). +* Rename `quantileDDSketch` to `quantileDD` [#59372](https://github.com/ClickHouse/ClickHouse/pull/59372) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 1d3e7d4964e..efbce54d44b 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -109,6 +109,9 @@ Do not check for a particular wording of error message, it may change in the fut If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries. +### Working with Temporary Files + +Sometimes in a shell test you may need to create a file on the fly to work with. Keep in mind that some CI checks run tests in parallel, so if you are creating or removing a temporary file in your script without a unique name this can cause some of the CI checks, such as Flaky, to fail. To get around this you should use environment variable `$CLICKHOUSE_TEST_UNIQUE_NAME` to give temporary files a name unique to the test that is running. That way you can be sure that the file you are creating during setup or removing during cleanup is the file only in use by that test and not some other test which is running in parallel. ## Known Bugs {#known-bugs} diff --git a/docs/en/getting-started/example-datasets/noaa.md b/docs/en/getting-started/example-datasets/noaa.md new file mode 100644 index 00000000000..9a3ec7791b6 --- /dev/null +++ b/docs/en/getting-started/example-datasets/noaa.md @@ -0,0 +1,342 @@ +--- +slug: /en/getting-started/example-datasets/noaa +sidebar_label: NOAA Global Historical Climatology Network +sidebar_position: 1 +description: 2.5 billion rows of climate data for the last 120 yrs +--- + +# NOAA Global Historical Climatology Network + +This dataset contains weather measurements for the last 120 years. Each row is a measurement for a point in time and station. + +More precisely and according to the [origin of this data](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn): + +> GHCN-Daily is a dataset that contains daily observations over global land areas. It contains station-based measurements from land-based stations worldwide, about two-thirds of which are for precipitation measurements only (Menne et al., 2012). GHCN-Daily is a composite of climate records from numerous sources that were merged together and subjected to a common suite of quality assurance reviews (Durre et al., 2010). The archive includes the following meteorological elements: + + - Daily maximum temperature + - Daily minimum temperature + - Temperature at the time of observation + - Precipitation (i.e., rain, melted snow) + - Snowfall + - Snow depth + - Other elements where available + +## Downloading the data + +- A [pre-prepared version](#pre-prepared-data) of the data for ClickHouse, which has been cleansed, re-structured, and enriched. This data covers the years 1900 to 2022. +- [Download the original data](#original-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore this approach. + +### Pre-prepared data + +More specifically, rows have been removed that did not fail any quality assurance checks by Noaa. The data has also been restructured from a measurement per line to a row per station id and date, i.e. + +```csv +"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType" +"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0 +"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0 +"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0 +"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0 +``` + +This is simpler to query and ensures the resulting table is less sparse. Finally, the data has also been enriched with latitude and longitude. + +This data is available in the following S3 location. Either download the data to your local filesystem (and insert using the ClickHouse client) or insert directly into ClickHouse (see [Inserting from S3](#inserting-from-s3)). + +To download: + +```bash +wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet +``` + +### Original data + +The following details the steps to download and transform the original data in preparation for loading into ClickHouse. + +#### Download + +To download the original data: + +```bash +for i in {1900..2023}; do wget https://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/${i}.csv.gz; done +``` + +#### Sampling the data + +```bash +$ clickhouse-local --query "SELECT * FROM '2021.csv.gz' LIMIT 10" --format PrettyCompact +┌─c1──────────┬───────c2─┬─c3───┬──c4─┬─c5───┬─c6───┬─c7─┬───c8─┐ +│ AE000041196 │ 20210101 │ TMAX │ 278 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AE000041196 │ 20210101 │ PRCP │ 0 │ D │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AE000041196 │ 20210101 │ TAVG │ 214 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TMAX │ 266 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TMIN │ 178 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ PRCP │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041194 │ 20210101 │ TAVG │ 217 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TMAX │ 262 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TMIN │ 155 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +│ AEM00041217 │ 20210101 │ TAVG │ 202 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │ +└─────────────┴──────────┴──────┴─────┴──────┴──────┴────┴──────┘ +``` + +Summarizing the [format documentation](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn): + + +Summarizing the format documentation and the columns in order: + + - An 11 character station identification code. This itself encodes some useful information + - YEAR/MONTH/DAY = 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986) + - ELEMENT = 4 character indicator of element type. Effectively the measurement type. While there are many measurements available, we select the following: + - PRCP - Precipitation (tenths of mm) + - SNOW - Snowfall (mm) + - SNWD - Snow depth (mm) + - TMAX - Maximum temperature (tenths of degrees C) + - TAVG - Average temperature (tenths of a degree C) + - TMIN - Minimum temperature (tenths of degrees C) + - PSUN - Daily percent of possible sunshine (percent) + - AWND - Average daily wind speed (tenths of meters per second) + - WSFG - Peak gust wind speed (tenths of meters per second) + - WT** = Weather Type where ** defines the weather type. Full list of weather types here. +- DATA VALUE = 5 character data value for ELEMENT i.e. the value of the measurement. +- M-FLAG = 1 character Measurement Flag. This has 10 possible values. Some of these values indicate questionable data accuracy. We accept data where this is set to “P” - identified as missing presumed zero, as this is only relevant to the PRCP, SNOW and SNWD measurements. +- Q-FLAG is the measurement quality flag with 14 possible values. We are only interested in data with an empty value i.e. it did not fail any quality assurance checks. +- S-FLAG is the source flag for the observation. Not useful for our analysis and ignored. +- OBS-TIME = 4-character time of observation in hour-minute format (i.e. 0700 =7:00 am). Typically not present in older data. We ignore this for our purposes. + +A measurement per line would result in a sparse table structure in ClickHouse. We should transform to a row per time and station, with measurements as columns. First, we limit the dataset to those rows without issues i.e. where `qFlag` is equal to an empty string. + +#### Clean the data + +Using [ClickHouse local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) we can filter rows that represent measurements of interest and pass our quality requirements: + +```bash +clickhouse local --query "SELECT count() +FROM file('*.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))" + +2679264563 +``` + +With over 2.6 billion rows, this isn’t a fast query since it involves parsing all the files. On our 8 core machine, this takes around 160 seconds. + + +### Pivot data + +While the measurement per line structure can be used with ClickHouse, it will unnecessarily complicate future queries. Ideally, we need a row per station id and date, where each measurement type and associated value are a column i.e. + +```csv +"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType" +"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0 +"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0 +"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0 +"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0 +``` + +Using ClickHouse local and a simple `GROUP BY`, we can repivot our data to this structure. To limit memory overhead, we do this one file at a time. + +```bash +for i in {1900..2022} +do +clickhouse-local --query "SELECT station_id, + toDate32(date) as date, + anyIf(value, measurement = 'TAVG') as tempAvg, + anyIf(value, measurement = 'TMAX') as tempMax, + anyIf(value, measurement = 'TMIN') as tempMin, + anyIf(value, measurement = 'PRCP') as precipitation, + anyIf(value, measurement = 'SNOW') as snowfall, + anyIf(value, measurement = 'SNWD') as snowDepth, + anyIf(value, measurement = 'PSUN') as percentDailySun, + anyIf(value, measurement = 'AWND') as averageWindSpeed, + anyIf(value, measurement = 'WSFG') as maxWindSpeed, + toUInt8OrZero(replaceOne(anyIf(measurement, startsWith(measurement, 'WT') AND value = 1), 'WT', '')) as weatherType +FROM file('$i.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') + WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT')) +GROUP BY station_id, date +ORDER BY station_id, date FORMAT CSV" >> "noaa.csv"; +done +``` + +This query produces a single 50GB file `noaa.csv`. + +### Enriching the data + +The data has no indication of location aside from a station id, which includes a prefix country code. Ideally, each station would have a latitude and longitude associated with it. To achieve this, NOAA conveniently provides the details of each station as a separate [ghcnd-stations.txt](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file). This file has [several columns](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file), of which five are useful to our future analysis: id, latitude, longitude, elevation, and name. + +```bash +wget http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt +``` + +```bash +clickhouse local --query "WITH stations AS (SELECT id, lat, lon, elevation, splitByString(' GSN ',name)[1] as name FROM file('ghcnd-stations.txt', Regexp, 'id String, lat Float64, lon Float64, elevation Float32, name String')) +SELECT station_id, + date, + tempAvg, + tempMax, + tempMin, + precipitation, + snowfall, + snowDepth, + percentDailySun, + averageWindSpeed, + maxWindSpeed, + weatherType, + tuple(lon, lat) as location, + elevation, + name +FROM file('noaa.csv', CSV, + 'station_id String, date Date32, tempAvg Int32, tempMax Int32, tempMin Int32, precipitation Int32, snowfall Int32, snowDepth Int32, percentDailySun Int8, averageWindSpeed Int32, maxWindSpeed Int32, weatherType UInt8') as noaa LEFT OUTER + JOIN stations ON noaa.station_id = stations.id INTO OUTFILE 'noaa_enriched.parquet' FORMAT Parquet SETTINGS format_regexp='^(.{11})\s+(\-?\d{1,2}\.\d{4})\s+(\-?\d{1,3}\.\d{1,4})\s+(\-?\d*\.\d*)\s+(.*)\s+(?:[\d]*)'" +``` +This query takes a few minutes to run and produces a 6.4 GB file, `noaa_enriched.parquet`. + +## Create table + +Create a MergeTree table in ClickHouse (from the ClickHouse client). + +```sql +CREATE TABLE noaa +( + `station_id` LowCardinality(String), + `date` Date32, + `tempAvg` Int32 COMMENT 'Average temperature (tenths of a degrees C)', + `tempMax` Int32 COMMENT 'Maximum temperature (tenths of degrees C)', + `tempMin` Int32 COMMENT 'Minimum temperature (tenths of degrees C)', + `precipitation` UInt32 COMMENT 'Precipitation (tenths of mm)', + `snowfall` UInt32 COMMENT 'Snowfall (mm)', + `snowDepth` UInt32 COMMENT 'Snow depth (mm)', + `percentDailySun` UInt8 COMMENT 'Daily percent of possible sunshine (percent)', + `averageWindSpeed` UInt32 COMMENT 'Average daily wind speed (tenths of meters per second)', + `maxWindSpeed` UInt32 COMMENT 'Peak gust wind speed (tenths of meters per second)', + `weatherType` Enum8('Normal' = 0, 'Fog' = 1, 'Heavy Fog' = 2, 'Thunder' = 3, 'Small Hail' = 4, 'Hail' = 5, 'Glaze' = 6, 'Dust/Ash' = 7, 'Smoke/Haze' = 8, 'Blowing/Drifting Snow' = 9, 'Tornado' = 10, 'High Winds' = 11, 'Blowing Spray' = 12, 'Mist' = 13, 'Drizzle' = 14, 'Freezing Drizzle' = 15, 'Rain' = 16, 'Freezing Rain' = 17, 'Snow' = 18, 'Unknown Precipitation' = 19, 'Ground Fog' = 21, 'Freezing Fog' = 22), + `location` Point, + `elevation` Float32, + `name` LowCardinality(String) +) ENGINE = MergeTree() ORDER BY (station_id, date); + +``` + +## Inserting into ClickHouse + +### Inserting from local file + +Data can be inserted from a local file as follows (from the ClickHouse client): + +```sql +INSERT INTO noaa FROM INFILE '/noaa_enriched.parquet' +``` + +where `` represents the full path to the local file on disk. + +See [here](https://clickhouse.com/blog/real-world-data-noaa-climate-data#load-the-data) for how to speed this load up. + +### Inserting from S3 + +```sql +INSERT INTO noaa SELECT * +FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet') + +``` +For how to speed this up, see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2). + +## Sample queries + +### Highest temperature ever + +```sql +SELECT + tempMax / 10 AS maxTemp, + location, + name, + date +FROM blogs.noaa +WHERE tempMax > 500 +ORDER BY + tempMax DESC, + date ASC +LIMIT 5 + +┌─maxTemp─┬─location──────────┬─name───────────────────────────────────────────┬───────date─┐ +│ 56.7 │ (-116.8667,36.45) │ CA GREENLAND RCH │ 1913-07-10 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-08-20 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-09-18 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-07-17 │ +│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-09-04 │ +└─────────┴───────────────────┴────────────────────────────────────────────────┴────────────┘ + +5 rows in set. Elapsed: 0.514 sec. Processed 1.06 billion rows, 4.27 GB (2.06 billion rows/s., 8.29 GB/s.) +``` + +Reassuringly consistent with the [documented record](https://en.wikipedia.org/wiki/List_of_weather_records#Highest_temperatures_ever_recorded) at [Furnace Creek](https://www.google.com/maps/place/36%C2%B027'00.0%22N+116%C2%B052'00.1%22W/@36.1329666,-116.1104099,8.95z/data=!4m5!3m4!1s0x0:0xf2ed901b860f4446!8m2!3d36.45!4d-116.8667) as of 2023. + +### Best ski resorts + +Using a [list of ski resorts](https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv) in the united states and their respective locations, we join these against the top 1000 weather stations with the most in any month in the last 5 yrs. Sorting this join by [geoDistance](https://clickhouse.com/docs/en/sql-reference/functions/geo/coordinates/#geodistance) and restricting the results to those where the distance is less than 20km, we select the top result per resort and sort this by total snow. Note we also restrict resorts to those above 1800m, as a broad indicator of good skiing conditions. + +```sql +SELECT + resort_name, + total_snow / 1000 AS total_snow_m, + resort_location, + month_year +FROM +( + WITH resorts AS + ( + SELECT + resort_name, + state, + (lon, lat) AS resort_location, + 'US' AS code + FROM url('https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv', CSVWithNames) + ) + SELECT + resort_name, + highest_snow.station_id, + geoDistance(resort_location.1, resort_location.2, station_location.1, station_location.2) / 1000 AS distance_km, + highest_snow.total_snow, + resort_location, + station_location, + month_year + FROM + ( + SELECT + sum(snowfall) AS total_snow, + station_id, + any(location) AS station_location, + month_year, + substring(station_id, 1, 2) AS code + FROM noaa + WHERE (date > '2017-01-01') AND (code = 'US') AND (elevation > 1800) + GROUP BY + station_id, + toYYYYMM(date) AS month_year + ORDER BY total_snow DESC + LIMIT 1000 + ) AS highest_snow + INNER JOIN resorts ON highest_snow.code = resorts.code + WHERE distance_km < 20 + ORDER BY + resort_name ASC, + total_snow DESC + LIMIT 1 BY + resort_name, + station_id +) +ORDER BY total_snow DESC +LIMIT 5 + +┌─resort_name──────────┬─total_snow_m─┬─resort_location─┬─month_year─┐ +│ Sugar Bowl, CA │ 7.799 │ (-120.3,39.27) │ 201902 │ +│ Donner Ski Ranch, CA │ 7.799 │ (-120.34,39.31) │ 201902 │ +│ Boreal, CA │ 7.799 │ (-120.35,39.33) │ 201902 │ +│ Homewood, CA │ 4.926 │ (-120.17,39.08) │ 201902 │ +│ Alpine Meadows, CA │ 4.926 │ (-120.22,39.17) │ 201902 │ +└──────────────────────┴──────────────┴─────────────────┴────────────┘ + +5 rows in set. Elapsed: 0.750 sec. Processed 689.10 million rows, 3.20 GB (918.20 million rows/s., 4.26 GB/s.) +Peak memory usage: 67.66 MiB. +``` + +## Credits + +We would like to acknowledge the efforts of the Global Historical Climatology Network for preparing, cleansing, and distributing this data. We appreciate your efforts. + +Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray, R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network - Daily (GHCN-Daily), Version 3. [indicate subset used following decimal, e.g. Version 3.25]. NOAA National Centers for Environmental Information. http://doi.org/10.7289/V5D21VHZ [17/08/2020] diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index a53844e792f..518037a2c7c 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -197,6 +197,29 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section). +## Aliases {#cli_aliases} + +- `\l` - SHOW DATABASES +- `\d` - SHOW TABLES +- `\c ` - USE DATABASE +- `.` - repeat the last query + + +## Shortkeys {#shortkeys_aliases} + +- `Alt (Option) + Shift + e` - open editor with current query. It is possible to set up an environment variable - `EDITOR`, by default vim is used. +- `Alt (Option) + #` - comment line. +- `Ctrl + r` - fuzzy history search. + +:::tip +To configure the correct work of meta key (Option) on MacOS: + +iTerm2: Go to Preferences -> Profile -> Keys -> Left Option key and click Esc+ +::: + +The full list with all available shortkeys - [replxx](https://github.com/AmokHuginnsson/replxx/blob/1f149bf/src/replxx_impl.cxx#L262). + + ## Connection string {#connection_string} clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax: diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 0dca7c525f2..3d4b8f62d2d 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -25,6 +25,8 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures. - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index a04018ac2c8..960903fa25f 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -28,8 +28,10 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. -- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). -- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. ## See Also {#see-also} diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 8b7d7f85552..6d5148ad965 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -45,11 +45,11 @@ clickhouse-benchmark [keys] < queries_file; - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. - `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). - `-r`, `--randomize` — Random order of queries execution if there is more than one input query. - `-s`, `--secure` — Using `TLS` connection. - `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). +- `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `--port` keys. - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md new file mode 100644 index 00000000000..cc601c097fe --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -0,0 +1,48 @@ + --- + toc_priority: 112 + --- + + # groupArraySorted {#groupArraySorted} + + Returns an array with the first N items in ascending order. + + ``` sql + groupArraySorted(N)(column) + ``` + + **Arguments** + + - `N` – The number of elements to return. + + If the parameter is omitted, default value is the size of input. + + - `column` – The value (Integer, String, Float and other Generic types). + + **Example** + + Gets the first 10 numbers: + + ``` sql + SELECT groupArraySorted(10)(number) FROM numbers(100) + ``` + + ``` text + ┌─groupArraySorted(10)(number)─┐ + │ [0,1,2,3,4,5,6,7,8,9] │ + └──────────────────────────────┘ + ``` + + + Gets all the String implementations of all numbers in column: + + ``` sql +SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); + + ``` + + ``` text + ┌─groupArraySorted(str)────────┐ + │ ['0','1','2','3','4'] │ + └──────────────────────────────┘ + ``` + \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 10bd3e11064..93d4282c32b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions: - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArraySample](./grouparraysample.md) +- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) @@ -88,7 +89,7 @@ ClickHouse-specific aggregate functions: - [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) - [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) - [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) +- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) - [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) - [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) - [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) @@ -105,4 +106,3 @@ ClickHouse-specific aggregate functions: - [sparkBar](./sparkbar.md) - [sumCount](./sumcount.md) - [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md) - diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 7467a47cf5f..2a166c83dad 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -18,7 +18,7 @@ Functions: - `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). - `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). - `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). -- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). +- `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md index 9cb73dfc9d8..f9acd2e20cb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch sidebar_position: 211 -title: quantileDDSketch +title: quantileDD --- -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). **Syntax** @@ -44,13 +44,13 @@ Input table has an integer and a float columns: Query to calculate 0.75-quantile (third quartile): ``` sql -SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table; +SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table; ``` Result: ``` text -┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐ +┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐ │ 2.974233423476717 │ 1.01 │ └─────────────────────────────────┴─────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index e5da6a9c1de..e2a5bc53e32 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -9,7 +9,7 @@ sidebar_position: 201 Syntax: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. ## quantilesExactExclusive diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 0058e13b4ca..17d51878420 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -156,6 +156,35 @@ SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantEleme └───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ ``` +To know what variant is stored in each row function `variantType(variant_column)` can be used. It returns `Enum` with variant type name for each row (or `'None'` if row is `NULL`). + +Example: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) from test; +``` + +```text +┌─variantType(v)─┐ +│ None │ +│ UInt64 │ +│ String │ +│ Array(UInt64) │ +└────────────────┘ +``` + +```sql +SELECT toTypeName(variantType(v)) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(variantType(v))──────────────────────────────────────────┐ +│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │ +└─────────────────────────────────────────────────────────────────────┘ +``` + ## Conversion between Variant column and other columns There are 3 possible conversions that can be performed with Variant column. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index ebc80e4d308..d05e7bbfe51 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2869,6 +2869,51 @@ SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantEleme └───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ ``` +## variantType + +Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns `'None'` for it. + +**Syntax** + +```sql +variantType(variant) +``` + +**Arguments** + +- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). + +**Returned value** + +- Enum8 column with variant type name for each row. + +**Example** + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT variantType(v) FROM test; +``` + +```text +┌─variantType(v)─┐ +│ None │ +│ UInt64 │ +│ String │ +│ Array(UInt64) │ +└────────────────┘ +``` + +```sql +SELECT toTypeName(variantType(v)) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(variantType(v))──────────────────────────────────────────┐ +│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │ +└─────────────────────────────────────────────────────────────────────┘ +``` + ## minSampleSizeConversion Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index a6ced870c18..d16b40876c7 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -21,7 +21,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. -Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. +Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). diff --git a/docs/ru/operations/system-tables/quota_usage.md b/docs/ru/operations/system-tables/quota_usage.md index 96f6debd24e..46305e59da6 100644 --- a/docs/ru/operations/system-tables/quota_usage.md +++ b/docs/ru/operations/system-tables/quota_usage.md @@ -26,8 +26,11 @@ slug: /ru/operations/system-tables/quota_usage - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. -- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. -- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. + ## Смотрите также {#see-also} diff --git a/docs/ru/operations/system-tables/quotas_usage.md b/docs/ru/operations/system-tables/quotas_usage.md index 27e7cdf8abe..4bc0f2e81ca 100644 --- a/docs/ru/operations/system-tables/quotas_usage.md +++ b/docs/ru/operations/system-tables/quotas_usage.md @@ -29,9 +29,10 @@ slug: /ru/operations/system-tables/quotas_usage - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. +- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен. +- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль. - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. - ## Смотрите также {#see-also} - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md index 709baea6af0..c14b81c9bf3 100644 --- a/docs/ru/sql-reference/statements/alter/quota.md +++ b/docs/ru/sql-reference/statements/alter/quota.md @@ -22,7 +22,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). -Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). +Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md index 18eba6b5b1a..398c52fdc73 100644 --- a/docs/ru/sql-reference/statements/create/quota.md +++ b/docs/ru/sql-reference/statements/create/quota.md @@ -20,7 +20,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] ``` Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). -Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). +Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 71ad219110d..d02af01126a 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -559,12 +559,33 @@ AccessChangesNotifier & AccessControl::getChangesNotifier() } -AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const +AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const { + // NOTE: In the case where the user has never been logged in using LDAP, + // Then user_id is not generated, and the authentication quota will always be nullptr. + auto authentication_quota = getAuthenticationQuota(credentials.getUserName(), address, forwarded_address); + if (authentication_quota) + { + /// Reserve a single try from the quota to check whether we have another authentication try. + /// This is required for correct behavior in this situation: + /// User has 1 login failures quota. + /// * At the first login with an invalid password: Increase the quota counter. 1 (used) > 1 (max) is false. + ///   Then try to authenticate the user and throw an AUTHENTICATION_FAILED error. + /// * In case of the second try: increase quota counter, 2 (used) > 1 (max), then throw QUOTA_EXCEED + ///   and don't let the user authenticate. + /// + /// The authentication failures counter will be reset after successful authentication. + authentication_quota->used(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS, 1); + } + try { - return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password, - allow_plaintext_password); + const auto auth_result = MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password, + allow_plaintext_password); + if (authentication_quota) + authentication_quota->reset(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS); + + return auth_result; } catch (...) { @@ -763,7 +784,34 @@ std::shared_ptr AccessControl::getEnabledQuota( const String & forwarded_address, const String & custom_quota_key) const { - return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key); + return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key, true); +} + +std::shared_ptr AccessControl::getAuthenticationQuota( + const String & user_name, const Poco::Net::IPAddress & address, const std::string & forwarded_address) const +{ + auto user_id = find(user_name); + UserPtr user; + if (user_id && (user = tryRead(*user_id))) + { + const auto new_current_roles = user->granted_roles.findGranted(user->default_roles); + const auto roles_info = getEnabledRolesInfo(new_current_roles, {}); + + // client_key is not received at the moment of authentication during TCP connection + // if key type is set to QuotaKeyType::CLIENT_KEY + // QuotaCache::QuotaInfo::calculateKey will throw exception without throw_if_client_key_empty = false + String quota_key; + bool throw_if_client_key_empty = false; + return quota_cache->getEnabledQuota(*user_id, + user->getName(), + roles_info->enabled_roles, + address, + forwarded_address, + quota_key, + throw_if_client_key_empty); + } + else + return nullptr; } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 76431ab4928..904f77faf90 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -118,7 +118,7 @@ public: scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; scope_guard subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const; - AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; + AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const; /// Makes a backup of access entities. void restoreFromBackup(RestorerFromBackup & restorer) override; @@ -206,6 +206,11 @@ public: const String & forwarded_address, const String & custom_quota_key) const; + std::shared_ptr getAuthenticationQuota( + const String & user_name, + const Poco::Net::IPAddress & address, + const std::string & forwarded_address) const; + std::vector getAllQuotasUsage() const; std::shared_ptr getEnabledSettings( diff --git a/src/Access/Common/QuotaDefs.cpp b/src/Access/Common/QuotaDefs.cpp index 4136fd52a00..0e9a4d5a365 100644 --- a/src/Access/Common/QuotaDefs.cpp +++ b/src/Access/Common/QuotaDefs.cpp @@ -111,6 +111,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type) static const auto info = make_info("WRITTEN_BYTES", 1); return info; } + case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS: + { + static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1); + return info; + } case QuotaType::MAX: break; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected quota type: {}", static_cast(type)); diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index dfe2b56ef31..4f849a72b43 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -11,16 +11,17 @@ using QuotaValue = UInt64; /// Kinds of resource what we wish to quota. enum class QuotaType { - QUERIES, /// Number of queries. - QUERY_SELECTS, /// Number of select queries. - QUERY_INSERTS, /// Number of insert queries. - ERRORS, /// Number of queries with exceptions. - RESULT_ROWS, /// Number of rows returned as result. - RESULT_BYTES, /// Number of bytes returned as result. - READ_ROWS, /// Number of rows read from tables. - READ_BYTES, /// Number of bytes read from tables. - EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. - WRITTEN_BYTES, /// Number of bytes written to tables. + QUERIES, /// Number of queries. + QUERY_SELECTS, /// Number of select queries. + QUERY_INSERTS, /// Number of insert queries. + ERRORS, /// Number of queries with exceptions. + RESULT_ROWS, /// Number of rows returned as result. + RESULT_BYTES, /// Number of bytes returned as result. + READ_ROWS, /// Number of rows read from tables. + READ_BYTES, /// Number of bytes read from tables. + EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. + WRITTEN_BYTES, /// Number of bytes written to tables. + FAILED_SEQUENTIAL_AUTHENTICATIONS, /// Number of recent failed authentications. MAX }; diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index a61e795f741..98cce586a54 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -105,6 +105,16 @@ struct EnabledQuota::Impl std::uniform_int_distribution distribution{0, count - 1}; return std::chrono::system_clock::duration(distribution(thread_local_rng)); } + + static void resetQuotaValue(const Intervals & intervals, QuotaType quota_type, QuotaValue value, std::chrono::system_clock::time_point current_time) + { + const auto quota_type_i = static_cast(quota_type); + for (const auto & interval : intervals.intervals) + { + interval.used[quota_type_i] = value; + interval.getEndOfInterval(current_time); + } + } }; @@ -285,6 +295,12 @@ void EnabledQuota::checkExceeded(QuotaType quota_type) const } +void EnabledQuota::reset(QuotaType quota_type) const +{ + const auto loaded = intervals.load(); + Impl::resetQuotaValue(*loaded, quota_type, 0, std::chrono::system_clock::now()); +} + std::optional EnabledQuota::getUsage() const { auto loaded = intervals.load(); diff --git a/src/Access/EnabledQuota.h b/src/Access/EnabledQuota.h index ed1e06fa1f2..c4665da5dc6 100644 --- a/src/Access/EnabledQuota.h +++ b/src/Access/EnabledQuota.h @@ -52,6 +52,8 @@ public: void checkExceeded() const; void checkExceeded(QuotaType quota_type) const; + void reset(QuotaType quota_type) const; + /// Returns the information about quota consumption. std::optional getUsage() const; diff --git a/src/Access/QuotaCache.cpp b/src/Access/QuotaCache.cpp index 3c25c8833f3..dc49e9680ad 100644 --- a/src/Access/QuotaCache.cpp +++ b/src/Access/QuotaCache.cpp @@ -30,7 +30,7 @@ void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota } -String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const +String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled, bool throw_if_client_key_empty) const { const auto & params = enabled.params; switch (quota->key_type) @@ -55,8 +55,15 @@ String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const { if (!params.client_key.empty()) return params.client_key; - throw Exception(ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY, "Quota {} (for user {}) requires a client supplied key.", - quota->getName(), params.user_name); + + if (throw_if_client_key_empty) + throw Exception( + ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY, + "Quota {} (for user {}) requires a client supplied key.", + quota->getName(), + params.user_name); + else + return ""; // Authentication quota has no client key at time of authentication. } case QuotaKeyType::CLIENT_KEY_OR_USER_NAME: { @@ -165,7 +172,14 @@ QuotaCache::QuotaCache(const AccessControl & access_control_) QuotaCache::~QuotaCache() = default; -std::shared_ptr QuotaCache::getEnabledQuota(const UUID & user_id, const String & user_name, const boost::container::flat_set & enabled_roles, const Poco::Net::IPAddress & client_address, const String & forwarded_address, const String & client_key) +std::shared_ptr QuotaCache::getEnabledQuota( + const UUID & user_id, + const String & user_name, + const boost::container::flat_set & enabled_roles, + const Poco::Net::IPAddress & client_address, + const String & forwarded_address, + const String & client_key, + bool throw_if_client_key_empty) { std::lock_guard lock{mutex}; ensureAllQuotasRead(); @@ -188,11 +202,10 @@ std::shared_ptr QuotaCache::getEnabledQuota(const UUID & use auto res = std::shared_ptr(new EnabledQuota(params)); enabled_quotas.emplace(std::move(params), res); - chooseQuotaToConsumeFor(*res); + chooseQuotaToConsumeFor(*res, throw_if_client_key_empty); return res; } - void QuotaCache::ensureAllQuotasRead() { /// `mutex` is already locked. @@ -257,13 +270,13 @@ void QuotaCache::chooseQuotaToConsume() i = enabled_quotas.erase(i); else { - chooseQuotaToConsumeFor(*elem); + chooseQuotaToConsumeFor(*elem, true); ++i; } } } -void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled) +void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled, bool throw_if_client_key_empty) { /// `mutex` is already locked. boost::shared_ptr intervals; @@ -271,7 +284,7 @@ void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled) { if (info.roles->match(enabled.params.user_id, enabled.params.enabled_roles)) { - String key = info.calculateKey(enabled); + String key = info.calculateKey(enabled, throw_if_client_key_empty); intervals = info.getOrBuildIntervals(key); break; } diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index 883ddfe47a8..97cf2d19c2f 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -28,7 +28,8 @@ public: const boost::container::flat_set & enabled_roles, const Poco::Net::IPAddress & address, const String & forwarded_address, - const String & client_key); + const String & client_key, + bool throw_if_client_key_empty); std::vector getAllQuotasUsage() const; @@ -41,7 +42,7 @@ private: QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); } void setQuota(const QuotaPtr & quota_, const UUID & quota_id_); - String calculateKey(const EnabledQuota & enabled_quota) const; + String calculateKey(const EnabledQuota & enabled_quota, bool throw_if_client_key_empty) const; boost::shared_ptr getOrBuildIntervals(const String & key); boost::shared_ptr rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time); void rebuildAllIntervals(); @@ -56,7 +57,7 @@ private: void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr & new_quota); void quotaRemoved(const UUID & quota_id); void chooseQuotaToConsume(); - void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota); + void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota, bool throw_if_client_key_empty); const AccessControl & access_control; mutable std::mutex mutex; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index f27c3c21a73..d72ddb42d9e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -291,8 +291,17 @@ public: const UInt64 size = value.size(); checkArraySize(size, max_elems); writeVarUInt(size, buf); - for (const auto & element : value) - writeBinaryLittleEndian(element, buf); + + + if constexpr (std::endian::native == std::endian::little) + { + buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); + } + else + { + for (const auto & element : value) + writeBinaryLittleEndian(element, buf); + } if constexpr (Trait::last) writeBinaryLittleEndian(this->data(place).total_values, buf); @@ -315,8 +324,16 @@ public: auto & value = this->data(place).value; value.resize_exact(size, arena); - for (auto & element : value) - readBinaryLittleEndian(element, buf); + + if constexpr (std::endian::native == std::endian::little) + { + buf.readStrict(reinterpret_cast(value.data()), size * sizeof(value[0])); + } + else + { + for (auto & element : value) + readBinaryLittleEndian(element, buf); + } if constexpr (Trait::last) readBinaryLittleEndian(this->data(place).total_values, buf); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp new file mode 100644 index 00000000000..0e9856cfab9 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -0,0 +1,414 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +struct Settings; + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; + extern const int TOO_LARGE_ARRAY_SIZE; +} + +namespace +{ + +enum class GroupArraySortedStrategy +{ + heap, + sort +}; + +constexpr size_t group_array_sorted_sort_strategy_max_elements_threshold = 1000000; + +template +struct GroupArraySortedData +{ + using Allocator = MixedAlignedArenaAllocator; + using Array = PODArray; + + static constexpr size_t partial_sort_max_elements_factor = 2; + + static constexpr bool is_value_generic_field = std::is_same_v; + + Array values; + + static bool compare(const T & lhs, const T & rhs) + { + if constexpr (is_value_generic_field) + { + return lhs < rhs; + } + else + { + return CompareHelper::less(lhs, rhs, -1); + } + } + + struct Comparator + { + bool operator()(const T & lhs, const T & rhs) + { + return compare(lhs, rhs); + } + }; + + ALWAYS_INLINE void heapReplaceTop() + { + size_t size = values.size(); + if (size < 2) + return; + + size_t child_index = 1; + + if (values.size() > 2 && compare(values[1], values[2])) + ++child_index; + + /// Check if we are in order + if (compare(values[child_index], values[0])) + return; + + size_t current_index = 0; + auto current = values[current_index]; + + do + { + /// We are not in heap-order, swap the parent with it's largest child. + values[current_index] = values[child_index]; + current_index = child_index; + + // Recompute the child based off of the updated parent + child_index = 2 * child_index + 1; + + if (child_index >= size) + break; + + if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1])) + { + /// Right child exists and is greater than left child. + ++child_index; + } + + /// Check if we are in order. + } while (!compare(values[child_index], current)); + + values[current_index] = current; + } + + ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena) + { + if constexpr (is_value_generic_field) + { + ::sort(values.begin(), values.end(), Comparator()); + } + else + { + bool try_sort = trySort(values.begin(), values.end(), Comparator()); + if (!try_sort) + RadixSort>::executeLSD(values.data(), values.size()); + } + + if (values.size() > max_elements) + values.resize(max_elements, arena); + } + + ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena) + { + if (values.size() < max_elements * partial_sort_max_elements_factor) + return; + + ::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator()); + values.resize(max_elements, arena); + } + + ALWAYS_INLINE void addElement(T && element, size_t max_elements, Arena * arena) + { + if constexpr (strategy == GroupArraySortedStrategy::heap) + { + if (values.size() >= max_elements) + { + /// Element is greater or equal than current max element, it cannot be in k min elements + if (!compare(element, values[0])) + return; + + values[0] = std::move(element); + heapReplaceTop(); + return; + } + + values.push_back(std::move(element), arena); + std::push_heap(values.begin(), values.end(), Comparator()); + } + else + { + values.push_back(std::move(element), arena); + partialSortAndLimitIfNeeded(max_elements, arena); + } + } + + ALWAYS_INLINE void insertResultInto(IColumn & to, size_t max_elements, Arena * arena) + { + auto & result_array = assert_cast(to); + auto & result_array_offsets = result_array.getOffsets(); + + sortAndLimit(max_elements, arena); + + result_array_offsets.push_back(result_array_offsets.back() + values.size()); + + if (values.empty()) + return; + + if constexpr (is_value_generic_field) + { + auto & result_array_data = result_array.getData(); + for (auto & value : values) + result_array_data.insert(value); + } + else + { + auto & result_array_data = assert_cast &>(result_array.getData()).getData(); + + size_t result_array_data_insert_begin = result_array_data.size(); + result_array_data.resize(result_array_data_insert_begin + values.size()); + + for (size_t i = 0; i < values.size(); ++i) + result_array_data[result_array_data_insert_begin + i] = values[i]; + } + } +}; + +template +using GroupArraySortedDataHeap = GroupArraySortedData; + +template +using GroupArraySortedDataSort = GroupArraySortedData; + +constexpr UInt64 aggregate_function_group_array_sorted_max_element_size = 0xFFFFFF; + +template +class GroupArraySorted final + : public IAggregateFunctionDataHelper> +{ +public: + explicit GroupArraySorted( + const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elements_) + : IAggregateFunctionDataHelper>( + {data_type_}, parameters_, std::make_shared(data_type_)) + , max_elements(max_elements_) + , serialization(data_type_->getDefaultSerialization()) + { + if (max_elements > aggregate_function_group_array_sorted_max_element_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Too large limit parameter for groupArraySorted aggregate function, it should not exceed {}", + aggregate_function_group_array_sorted_max_element_size); + } + + String getName() const override { return "groupArraySorted"; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + if constexpr (std::is_same_v) + { + auto row_value = (*columns[0])[row_num]; + this->data(place).addElement(std::move(row_value), max_elements, arena); + } + else + { + auto row_value = assert_cast &>(*columns[0]).getData()[row_num]; + this->data(place).addElement(std::move(row_value), max_elements, arena); + } + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & rhs_values = this->data(rhs).values; + for (auto rhs_element : rhs_values) + this->data(place).addElement(std::move(rhs_element), max_elements, arena); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + auto & values = this->data(place).values; + size_t size = values.size(); + writeVarUInt(size, buf); + + if constexpr (std::is_same_v) + { + for (const Field & element : values) + { + if (element.isNull()) + { + writeBinary(false, buf); + } + else + { + writeBinary(true, buf); + serialization->serializeBinary(element, buf, {}); + } + } + } + else + { + if constexpr (std::endian::native == std::endian::little) + { + buf.write(reinterpret_cast(values.data()), size * sizeof(values[0])); + } + else + { + for (const auto & element : values) + writeBinaryLittleEndian(element, buf); + } + } + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override + { + size_t size = 0; + readVarUInt(size, buf); + + if (unlikely(size > max_elements)) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elements); + + auto & values = this->data(place).values; + values.resize_exact(size, arena); + + if constexpr (std::is_same_v) + { + for (Field & element : values) + { + UInt8 is_null = 0; + readBinary(is_null, buf); + if (!is_null) + serialization->deserializeBinary(element, buf, {}); + } + } + else + { + if constexpr (std::endian::native == std::endian::little) + { + buf.readStrict(reinterpret_cast(values.data()), size * sizeof(values[0])); + } + else + { + for (auto & element : values) + readBinaryLittleEndian(element, buf); + } + } + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + this->data(place).insertResultInto(to, max_elements, arena); + } + + bool allocatesMemoryInArena() const override { return true; } + +private: + UInt64 max_elements; + SerializationPtr serialization; +}; + +template +using GroupArraySortedHeap = GroupArraySorted, T>; + +template +using GroupArraySortedSort = GroupArraySorted, T>; + +template