merge master

This commit is contained in:
taiyang-li 2024-02-01 10:58:33 +08:00
commit ddc6aad8ff
165 changed files with 4691 additions and 757 deletions

View File

@ -138,19 +138,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################

View File

@ -242,20 +242,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
# FIXME: avoid using 0 checkout
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################

View File

@ -104,7 +104,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Fast tests
test_name: Fast test
runner_type: builder
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -273,19 +273,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################

View File

@ -153,19 +153,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
@ -456,7 +463,8 @@ jobs:
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs:
- DockerServerImages
- DockerServerImage
- DockerKeeperImage
- BuilderReport
- BuilderSpecialReport
- MarkReleaseReady

View File

@ -21,18 +21,18 @@
#### New Feature
* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)).
* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)).
* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
* Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)).
* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)).
* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)).
* Added function `sqidDecode` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)).
* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Added function `seriesDecomposeSTL` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)).
* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)).
* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)).

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 24.1 | ✔️ |
| 23.12 | ✔️ |
| 23.11 | ✔️ |
| 23.10 | ✔️ |
| 23.10 | |
| 23.9 | ❌ |
| 23.8 | ✔️ |
| 23.7 | ❌ |

View File

@ -64,19 +64,14 @@ using ComparatorWrapper = Comparator;
#include <miniselect/floyd_rivest_select.h>
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
template <typename RandomIt, typename Compare>
void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
comparator compare;
ComparatorWrapper<comparator> compare_wrapper = compare;
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);
#ifndef NDEBUG
@ -87,6 +82,15 @@ void nth_element(RandomIt first, RandomIt nth, RandomIt last)
#endif
}
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::nth_element(first, nth, last, comparator());
}
template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54482)
SET(VERSION_REVISION 54483)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 1)
SET(VERSION_MINOR 2)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8)
SET(VERSION_DESCRIBE v24.1.1.1-testing)
SET(VERSION_STRING 24.1.1.1)
SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17)
SET(VERSION_DESCRIBE v24.2.1.1-testing)
SET(VERSION_STRING 24.2.1.1)
# end of autochange

View File

@ -16,29 +16,30 @@ message(STATUS "Checking Rust toolchain for current target")
# See https://doc.rust-lang.org/nightly/rustc/platform-support.html
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
set(Rust_CARGO_TARGET "aarch64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
else()
message(FATAL_ERROR "Unsupported rust target")
endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
if(DEFINED CMAKE_TOOLCHAIN_FILE)
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
set(Rust_CARGO_TARGET "aarch64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
else()
message(FATAL_ERROR "Unsupported rust target")
endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
endif ()
# FindRust.cmake
list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit d755a5f7c009dd63a61b2c745180d8ba937cbfeb
Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
Subproject commit 8292f361458fcffe0bff515a385be02e9d35582c

View File

@ -21,7 +21,7 @@ extern "C" {
* your library and includes mismatch
*/
#ifndef LIBXML2_COMPILING_MSCCDEF
XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
XMLPUBFUN void xmlCheckVersion(int version);
#endif /* LIBXML2_COMPILING_MSCCDEF */
/**
@ -29,28 +29,28 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
*
* the version string like "1.2.3"
*/
#define LIBXML_DOTTED_VERSION "2.10.3"
#define LIBXML_DOTTED_VERSION "2.12.4"
/**
* LIBXML_VERSION:
*
* the version number: 1.2.3 value is 10203
*/
#define LIBXML_VERSION 21003
#define LIBXML_VERSION 21204
/**
* LIBXML_VERSION_STRING:
*
* the version number string, 1.2.3 value is "10203"
*/
#define LIBXML_VERSION_STRING "21003"
#define LIBXML_VERSION_STRING "21204"
/**
* LIBXML_VERSION_EXTRA:
*
* extra version information, used to show a git commit description
*/
#define LIBXML_VERSION_EXTRA ""
#define LIBXML_VERSION_EXTRA "-GITv2.12.4"
/**
* LIBXML_TEST_VERSION:
@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
* Macro to check that the libxml version in use is compatible with
* the version the software has been compiled against
*/
#define LIBXML_TEST_VERSION xmlCheckVersion(21003);
#define LIBXML_TEST_VERSION xmlCheckVersion(21204);
#ifndef VMS
#if 0
@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
*
* Whether iconv support is available
*/
#if 0
#if 1
#define LIBXML_ICONV_ENABLED
#endif
@ -313,7 +313,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
/**
* LIBXML_DEBUG_RUNTIME:
*
* Whether the runtime debugging is configured in
* Removed
*/
#if 0
#define LIBXML_DEBUG_RUNTIME
@ -409,12 +409,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#endif
#ifdef __GNUC__
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
/** DOC_DISABLE */
#ifndef ATTRIBUTE_UNUSED
# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7)))
@ -424,12 +419,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
# endif
#endif
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#ifndef LIBXML_ATTR_ALLOC_SIZE
# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
@ -440,12 +429,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
# define LIBXML_ATTR_ALLOC_SIZE(x)
#endif
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#ifndef LIBXML_ATTR_FORMAT
# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))
# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args)))
@ -457,44 +440,69 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#endif
#ifndef XML_DEPRECATED
# ifdef IN_LIBXML
# if defined (IN_LIBXML) || (__GNUC__ * 100 + __GNUC_MINOR__ < 301)
# define XML_DEPRECATED
# else
/* Available since at least GCC 3.1 */
# else
# define XML_DEPRECATED __attribute__((deprecated))
# endif
#endif
#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)
#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 800)
#define XML_IGNORE_FPTR_CAST_WARNINGS \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wpedantic\"") \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
#else
#define XML_IGNORE_FPTR_CAST_WARNINGS \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
#endif
#define XML_POP_WARNINGS \
_Pragma("GCC diagnostic pop")
#else
#define XML_IGNORE_FPTR_CAST_WARNINGS
#define XML_POP_WARNINGS
#endif
#else /* ! __GNUC__ */
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
#define ATTRIBUTE_UNUSED
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#define LIBXML_ATTR_ALLOC_SIZE(x)
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#define LIBXML_ATTR_FORMAT(fmt,args)
/**
* XML_DEPRECATED:
*
* Macro used to indicate that a function, variable, type or struct member
* is deprecated.
*/
#ifndef XML_DEPRECATED
#define XML_DEPRECATED
# if defined (IN_LIBXML) || !defined (_MSC_VER)
# define XML_DEPRECATED
/* Available since Visual Studio 2005 */
# elif defined (_MSC_VER) && (_MSC_VER >= 1400)
# define XML_DEPRECATED __declspec(deprecated)
# endif
#endif
#if defined (_MSC_VER) && (_MSC_VER >= 1400)
# define XML_IGNORE_FPTR_CAST_WARNINGS __pragma(warning(push))
#else
# define XML_IGNORE_FPTR_CAST_WARNINGS
#endif
#ifndef XML_POP_WARNINGS
# if defined (_MSC_VER) && (_MSC_VER >= 1400)
# define XML_POP_WARNINGS __pragma(warning(pop))
# else
# define XML_POP_WARNINGS
# endif
#endif
#endif /* __GNUC__ */
#define XML_NO_ATTR
#ifdef LIBXML_THREAD_ENABLED
#define XML_DECLARE_GLOBAL(name, type, attrs) \
attrs XMLPUBFUN type *__##name(void);
#define XML_GLOBAL_MACRO(name) (*__##name())
#else
#define XML_DECLARE_GLOBAL(name, type, attrs) \
attrs XMLPUBVAR type name;
#endif
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -1,5 +1,6 @@
if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory")
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
if (APPLE OR SANITIZE STREQUAL "memory")
# llvm-tblgen, that is used during LLVM build, will throw MSAN errors when running (breaking the build)
# TODO: Retest when upgrading LLVM or build only llvm-tblgen without sanitizers
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
set (ENABLE_DWARF_PARSER_DEFAULT OFF)
else()

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -293,10 +293,10 @@ if [ $failed_to_save_logs -ne 0 ]; then
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
do
clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
fi
done
fi

View File

@ -0,0 +1,438 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.1.2048-stable (5a024dfc093) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
#### Backward Incompatible Change
* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix sumMapFiltered with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)).
* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Kusto dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/ split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)).
* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)).
* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)).
* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)).
* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)).
* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)).
* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)).
* `clickhouse-format` improvements: * support INSERT queries with `VALUES` * support comments (use `--comments` to output them) * support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)).
* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)).
* Added FROM <Replicas> modifier for SYSTEM SYNC REPLICA LIGHTWEIGHT query. The FROM modifier ensures we wait for for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)).
* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)).
* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)).
* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)).
#### Performance Improvement
* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads=1`) produced sorted output without explicitly provided `ORDER BY` clause. This behaviour no longer exists when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)).
* Optimize array element function when input is array(map)/array(array(num)/array(array(string))/array(bigint)/array(decimal). Current implementation causes too many reallocs. The optimization speed up by ~6x especially when input type is array(map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)).
* Bypass `Poco::BasicBufferedStreamBuf` abstraction when reading from S3 (namely `ReadBufferFromIStream`) to avoid extra copying of data. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)).
* Read column once while reading more that one subcolumn from it in Compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)).
* Rewrite the AST of sum(column + literal) function. [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)).
* The evaluation of function `match()` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)).
* Default coordinator for parallel replicas is rewritten for better cache locality (same mark ranges are almost always assigned to the same replicas). Consistent hashing is used also during work stealing, so better tail latency is expected. It has been tested for linear scalability on a hundred of replicas. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)).
* MergeTree FINAL to not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)).
* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)).
* The evaluation of function `match()` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)).
* Speedup MIN/MAX for non numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)).
* Enable JIT compilation for aggregation without a key. Closes [#41461](https://github.com/ClickHouse/ClickHouse/issues/41461). Originally [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757). [#58440](https://github.com/ClickHouse/ClickHouse/pull/58440) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The performance experiments of **OnTime** on the Intel server with up to AVX2 (and BMI2) support show that this change could effectively improve the QPS of **Q2** and **Q3** by **5.0%** and **3.7%** through reducing the cycle ratio of the hotspot, **_DB::MergeTreeRangeReader::ReadResult::optimize_**, **from 11.48% to 1.09%** and **from 8.09% to 0.67%** respectively while having no impact on others. [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)).
* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)).
* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)).
#### Improvement
* Enable MySQL/MariaDB on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Robert Schulze](https://github.com/rschu1ze)).
* Do not interpret numbers with leading zeroes as octals. [#55575](https://github.com/ClickHouse/ClickHouse/pull/55575) ([Joanna Hulboj](https://github.com/jh0x)).
* Replace HTTP outgoing buffering based on std ostream with CH Buffer. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)).
* Add `<host_name>` config parameter that allows avoiding resolving hostnames in DDLWorker. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)).
* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)).
* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)).
* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)).
* ClusterDiscovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)).
* Support query parameters in ALTER TABLE ... PART. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)).
* Create consumers for Kafka tables on fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)).
* Sparkbar as an alias to sparkbar. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)).
* Avoid sending ComposeObject requests after upload to GCS. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)).
* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)).
* Added comments (brief descriptions) to all columns of system tables. The are several reasons fro this: - We use system tables a lot and sometimes is could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow registering database engines independently. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)).
* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)).
* Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)).
* Provide hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)).
* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)).
* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)).
* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)).
* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)).
* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)).
* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)).
* Improve S3 compatible, add Ecloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)).
* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)).
* Avro format support Zstd codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fixing a problem described in [#58719](https://github.com/ClickHouse/ClickHouse/issues/58719). [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)).
* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)).
* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)).
* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)).
* Function `seriesDecomposeSTL()` now returns a baseline component as season + trend components. [#58961](https://github.com/ClickHouse/ClickHouse/pull/58961) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Fix memory management in copyDataToS3File. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)).
* Change working directory to data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)).
* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixes https://github.com/ClickHouse/ClickHouse/pull/59120#issuecomment-1906177350. [#59122](https://github.com/ClickHouse/ClickHouse/pull/59122) ([Arthur Passos](https://github.com/arthurpassos)).
* The state of URL's #hash in the dashboard is now compressed using [lz-string](https://github.com/pieroxy/lz-string). The default size of the state string is compressed from 6856B to 2823B. [#59124](https://github.com/ClickHouse/ClickHouse/pull/59124) ([Amos Bird](https://github.com/amosbird)).
* Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#59293](https://github.com/ClickHouse/ClickHouse/pull/59293) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fix bug in experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). CC @SmitaRKulkarni. [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)).
* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve exception message of function regexp_extract, close [#56393](https://github.com/ClickHouse/ClickHouse/issues/56393). [#59319](https://github.com/ClickHouse/ClickHouse/pull/59319) ([李扬](https://github.com/taiyang-li)).
* Support the FORMAT clause in BACKUP and RESTORE queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)).
* Function `concatWithSeparator()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)).
#### Build/Testing/Packaging Improvement
* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)).
* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)).
* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)).
* Enable Rust on OSX ARM64 (this will add fuzzy search in client with skim and prql language, though I don't think that are people who hosts ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)).
* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)).
* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)).
* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)).
* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)).
* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)).
* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)).
* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)).
* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)).
* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)).
* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)).
* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)).
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)).
* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)).
* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)).
* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)).
* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)).
* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)).
* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)).
* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix an error in the release script - it didn't allow to make 23.12."'. [#58381](https://github.com/ClickHouse/ClickHouse/pull/58381) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Use CH Buffer for HTTP out stream, add metrics for interfaces"'. [#58450](https://github.com/ClickHouse/ClickHouse/pull/58450) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Second attempt: Use CH Buffer for HTTP out stream, add metrics for interfaces'. [#58475](https://github.com/ClickHouse/ClickHouse/pull/58475) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* NO CL ENTRY: 'Revert "Merging [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757)"'. [#58542](https://github.com/ClickHouse/ClickHouse/pull/58542) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add support for MySQL `net_write_timeout` and `net_read_timeout` settings"'. [#58872](https://github.com/ClickHouse/ClickHouse/pull/58872) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Extend performance test norm_dist.xml"'. [#58989](https://github.com/ClickHouse/ClickHouse/pull/58989) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"'. [#58990](https://github.com/ClickHouse/ClickHouse/pull/58990) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Allow parallel replicas for JOIN with analyzer [part 1]."'. [#59059](https://github.com/ClickHouse/ClickHouse/pull/59059) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Consume leading zeroes when parsing a number in ConstantExpressionTemplate"'. [#59070](https://github.com/ClickHouse/ClickHouse/pull/59070) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Revert "Allow parallel replicas for JOIN with analyzer [part 1].""'. [#59076](https://github.com/ClickHouse/ClickHouse/pull/59076) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* NO CL ENTRY: 'Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition"'. [#59120](https://github.com/ClickHouse/ClickHouse/pull/59120) ([Robert Schulze](https://github.com/rschu1ze)).
* NO CL ENTRY: 'DisksApp.cpp: fix typo (specifiged → specified)'. [#59140](https://github.com/ClickHouse/ClickHouse/pull/59140) ([Nikolay Edigaryev](https://github.com/edigaryev)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Analyzer: Fix resolving subcolumns in JOIN [#49703](https://github.com/ClickHouse/ClickHouse/pull/49703) ([vdimir](https://github.com/vdimir)).
* Analyzer: always qualify execution names [#53705](https://github.com/ClickHouse/ClickHouse/pull/53705) ([Dmitry Novik](https://github.com/novikd)).
* Insert quorum: check host node version in addition [#55528](https://github.com/ClickHouse/ClickHouse/pull/55528) ([Igor Nikonov](https://github.com/devcrafter)).
* Remove more old code of projection analysis [#55579](https://github.com/ClickHouse/ClickHouse/pull/55579) ([Anton Popov](https://github.com/CurtizJ)).
* Better exception messages in input formats [#57053](https://github.com/ClickHouse/ClickHouse/pull/57053) ([Kruglov Pavel](https://github.com/Avogar)).
* Parallel replicas custom key: skip unavailable replicas [#57235](https://github.com/ClickHouse/ClickHouse/pull/57235) ([Igor Nikonov](https://github.com/devcrafter)).
* Small change in log message in MergeTreeDataMergerMutator [#57550](https://github.com/ClickHouse/ClickHouse/pull/57550) ([Nikita Taranov](https://github.com/nickitat)).
* fs cache: small optimization [#57615](https://github.com/ClickHouse/ClickHouse/pull/57615) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Customizable dependency failure handling for AsyncLoader [#57697](https://github.com/ClickHouse/ClickHouse/pull/57697) ([Sergei Trifonov](https://github.com/serxa)).
* Bring test back [#57700](https://github.com/ClickHouse/ClickHouse/pull/57700) ([Nikita Taranov](https://github.com/nickitat)).
* Change default database name in clickhouse-local to 'default' [#57774](https://github.com/ClickHouse/ClickHouse/pull/57774) ([Kruglov Pavel](https://github.com/Avogar)).
* Add option `--show-whitespaces-in-diff` to clickhouse-test [#57870](https://github.com/ClickHouse/ClickHouse/pull/57870) ([vdimir](https://github.com/vdimir)).
* Update `query_masking_rules` when reloading the config, attempt 2 [#57993](https://github.com/ClickHouse/ClickHouse/pull/57993) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Remove unneeded parameter `use_external_buffer` from `AsynchronousReadBuffer*` [#58077](https://github.com/ClickHouse/ClickHouse/pull/58077) ([Nikita Taranov](https://github.com/nickitat)).
* Print another message in Bugfix check if internal check had been failed [#58091](https://github.com/ClickHouse/ClickHouse/pull/58091) ([vdimir](https://github.com/vdimir)).
* Refactor StorageMerge virtual columns filtering. [#58255](https://github.com/ClickHouse/ClickHouse/pull/58255) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: fix tuple comparison when result is always null [#58266](https://github.com/ClickHouse/ClickHouse/pull/58266) ([vdimir](https://github.com/vdimir)).
* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix timeout in 01732_race_condition_storage_join_long [#58298](https://github.com/ClickHouse/ClickHouse/pull/58298) ([vdimir](https://github.com/vdimir)).
* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable max_bytes_before_external* in 00172_hits_joins [#58309](https://github.com/ClickHouse/ClickHouse/pull/58309) ([vdimir](https://github.com/vdimir)).
* Analyzer: support functional arguments in USING clause [#58317](https://github.com/ClickHouse/ClickHouse/pull/58317) ([Dmitry Novik](https://github.com/novikd)).
* Fixed logical error in CheckSortedTransform [#58318](https://github.com/ClickHouse/ClickHouse/pull/58318) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Refreshable materialized views again [#58320](https://github.com/ClickHouse/ClickHouse/pull/58320) ([Michael Kolupaev](https://github.com/al13n321)).
* Organize symbols from src/* into DB namespace [#58336](https://github.com/ClickHouse/ClickHouse/pull/58336) ([Amos Bird](https://github.com/amosbird)).
* Add a style check against DOS and Windows [#58345](https://github.com/ClickHouse/ClickHouse/pull/58345) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Check what happen if remove array joined columns from KeyCondition [#58346](https://github.com/ClickHouse/ClickHouse/pull/58346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Upload time of the perf tests into artifacts as test_duration_ms [#58348](https://github.com/ClickHouse/ClickHouse/pull/58348) ([Azat Khuzhin](https://github.com/azat)).
* Keep exception format string in retries ctl [#58351](https://github.com/ClickHouse/ClickHouse/pull/58351) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix replication.lib helper (system.mutations has database not current_database) [#58352](https://github.com/ClickHouse/ClickHouse/pull/58352) ([Azat Khuzhin](https://github.com/azat)).
* Refactor StorageHDFS and StorageFile virtual columns filtering [#58353](https://github.com/ClickHouse/ClickHouse/pull/58353) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix suspended workers for AsyncLoader [#58362](https://github.com/ClickHouse/ClickHouse/pull/58362) ([Sergei Trifonov](https://github.com/serxa)).
* Remove stale events from README [#58364](https://github.com/ClickHouse/ClickHouse/pull/58364) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Do not fail the CI on an expired token [#58384](https://github.com/ClickHouse/ClickHouse/pull/58384) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a test for [#38534](https://github.com/ClickHouse/ClickHouse/issues/38534) [#58391](https://github.com/ClickHouse/ClickHouse/pull/58391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix database engine validation inside database factory [#58395](https://github.com/ClickHouse/ClickHouse/pull/58395) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix bad formatting of the `timeDiff` compatibility alias [#58398](https://github.com/ClickHouse/ClickHouse/pull/58398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a comment; remove unused method; stop using pointers [#58399](https://github.com/ClickHouse/ClickHouse/pull/58399) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Make a test not depend on the lack of floating point associativity [#58439](https://github.com/ClickHouse/ClickHouse/pull/58439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `02944_dynamically_change_filesystem_cache_size` [#58445](https://github.com/ClickHouse/ClickHouse/pull/58445) ([Nikolay Degterinsky](https://github.com/evillique)).
* Analyzer: Fix LOGICAL_ERROR with LowCardinality [#58457](https://github.com/ClickHouse/ClickHouse/pull/58457) ([Dmitry Novik](https://github.com/novikd)).
* Replace `std::regex` by re2 [#58458](https://github.com/ClickHouse/ClickHouse/pull/58458) ([Robert Schulze](https://github.com/rschu1ze)).
* Improve perf tests [#58478](https://github.com/ClickHouse/ClickHouse/pull/58478) ([Raúl Marín](https://github.com/Algunenano)).
* Check if I can remove KeyCondition analysis on AST. [#58480](https://github.com/ClickHouse/ClickHouse/pull/58480) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix some thread pool settings not updating at runtime [#58485](https://github.com/ClickHouse/ClickHouse/pull/58485) ([Michael Kolupaev](https://github.com/al13n321)).
* Lower log levels for some Raft logs to new test level [#58487](https://github.com/ClickHouse/ClickHouse/pull/58487) ([Antonio Andelic](https://github.com/antonio2368)).
* PartsSplitter small refactoring [#58506](https://github.com/ClickHouse/ClickHouse/pull/58506) ([Maksim Kita](https://github.com/kitaisreal)).
* Sync content of the docker test images [#58507](https://github.com/ClickHouse/ClickHouse/pull/58507) ([Max K.](https://github.com/maxknv)).
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
* Minor fixups for `sqid()` [#58517](https://github.com/ClickHouse/ClickHouse/pull/58517) ([Robert Schulze](https://github.com/rschu1ze)).
* Update version_date.tsv and changelogs after v23.12.2.59-stable [#58545](https://github.com/ClickHouse/ClickHouse/pull/58545) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.11.4.24-stable [#58546](https://github.com/ClickHouse/ClickHouse/pull/58546) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.8.9.54-lts [#58547](https://github.com/ClickHouse/ClickHouse/pull/58547) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.10.6.60-stable [#58548](https://github.com/ClickHouse/ClickHouse/pull/58548) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.3.19.32-lts [#58549](https://github.com/ClickHouse/ClickHouse/pull/58549) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update CHANGELOG.md [#58559](https://github.com/ClickHouse/ClickHouse/pull/58559) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Fix test 02932_kill_query_sleep [#58560](https://github.com/ClickHouse/ClickHouse/pull/58560) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI fix. Add packager script to build digest [#58571](https://github.com/ClickHouse/ClickHouse/pull/58571) ([Max K.](https://github.com/maxknv)).
* fix and test that S3Clients are reused [#58573](https://github.com/ClickHouse/ClickHouse/pull/58573) ([Sema Checherinda](https://github.com/CheSema)).
* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Do not load database engines in suggest [#58586](https://github.com/ClickHouse/ClickHouse/pull/58586) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wrong message in Keeper [#58588](https://github.com/ClickHouse/ClickHouse/pull/58588) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add some missing LLVM includes [#58594](https://github.com/ClickHouse/ClickHouse/pull/58594) ([Raúl Marín](https://github.com/Algunenano)).
* Small fix in Keeper [#58598](https://github.com/ClickHouse/ClickHouse/pull/58598) ([Antonio Andelic](https://github.com/antonio2368)).
* Update analyzer_tech_debt.txt [#58599](https://github.com/ClickHouse/ClickHouse/pull/58599) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Simplify release.py script [#58600](https://github.com/ClickHouse/ClickHouse/pull/58600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update analyzer_tech_debt.txt [#58602](https://github.com/ClickHouse/ClickHouse/pull/58602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Refactor stacktrace symbolizer to avoid copy-paste [#58610](https://github.com/ClickHouse/ClickHouse/pull/58610) ([Azat Khuzhin](https://github.com/azat)).
* Add intel AMX checking [#58617](https://github.com/ClickHouse/ClickHouse/pull/58617) ([Roman Glinskikh](https://github.com/omgronny)).
* Optional `client` argument for `S3Helper` [#58619](https://github.com/ClickHouse/ClickHouse/pull/58619) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add sorting to 02366_kql_summarize.sql [#58621](https://github.com/ClickHouse/ClickHouse/pull/58621) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove more projections code [#58628](https://github.com/ClickHouse/ClickHouse/pull/58628) ([Anton Popov](https://github.com/CurtizJ)).
* Remove finalize() from ~WriteBufferFromEncryptedFile [#58629](https://github.com/ClickHouse/ClickHouse/pull/58629) ([Vitaly Baranov](https://github.com/vitlibar)).
* Update test_replicated_database/test.py [#58647](https://github.com/ClickHouse/ClickHouse/pull/58647) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Try disabling `muzzy_decay_ms` in jemalloc [#58648](https://github.com/ClickHouse/ClickHouse/pull/58648) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix test_replicated_database::test_startup_without_zk flakiness [#58649](https://github.com/ClickHouse/ClickHouse/pull/58649) ([Azat Khuzhin](https://github.com/azat)).
* Fix 01600_remerge_sort_lowered_memory_bytes_ratio flakiness (due to settings randomization) [#58650](https://github.com/ClickHouse/ClickHouse/pull/58650) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Fix assertion in HashJoin with duplicate columns [#58652](https://github.com/ClickHouse/ClickHouse/pull/58652) ([vdimir](https://github.com/vdimir)).
* Document that `match()` can use `ngrambf_v1` and `tokenbf_v1` indexes [#58655](https://github.com/ClickHouse/ClickHouse/pull/58655) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix perf tests duration (checks.test_duration_ms) [#58656](https://github.com/ClickHouse/ClickHouse/pull/58656) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Correctly handle constant set in index [#58657](https://github.com/ClickHouse/ClickHouse/pull/58657) ([Dmitry Novik](https://github.com/novikd)).
* fix a typo in stress randomization setting [#58658](https://github.com/ClickHouse/ClickHouse/pull/58658) ([Sema Checherinda](https://github.com/CheSema)).
* Small follow-up to `std::regex` --> `re2` conversion ([#58458](https://github.com/ClickHouse/ClickHouse/issues/58458)) [#58678](https://github.com/ClickHouse/ClickHouse/pull/58678) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove `<regex>` from libcxx [#58681](https://github.com/ClickHouse/ClickHouse/pull/58681) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix bad log message [#58698](https://github.com/ClickHouse/ClickHouse/pull/58698) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Some small improvements to version_helper from [#57203](https://github.com/ClickHouse/ClickHouse/issues/57203) [#58712](https://github.com/ClickHouse/ClickHouse/pull/58712) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Small fixes in different helpers [#58717](https://github.com/ClickHouse/ClickHouse/pull/58717) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix bug in new (not released yet) parallel replicas coordinator [#58722](https://github.com/ClickHouse/ClickHouse/pull/58722) ([Nikita Taranov](https://github.com/nickitat)).
* Analyzer: Fix LOGICAL_ERROR in CountDistinctPass [#58723](https://github.com/ClickHouse/ClickHouse/pull/58723) ([Dmitry Novik](https://github.com/novikd)).
* Fix reading of offsets subcolumn (`size0`) from `Nested` [#58729](https://github.com/ClickHouse/ClickHouse/pull/58729) ([Anton Popov](https://github.com/CurtizJ)).
* Fix Mac OS X [#58733](https://github.com/ClickHouse/ClickHouse/pull/58733) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix stress with generate-template-key [#58740](https://github.com/ClickHouse/ClickHouse/pull/58740) ([Sema Checherinda](https://github.com/CheSema)).
* more relaxed check [#58751](https://github.com/ClickHouse/ClickHouse/pull/58751) ([Sema Checherinda](https://github.com/CheSema)).
* Fix usage of small buffers for remote reading [#58768](https://github.com/ClickHouse/ClickHouse/pull/58768) ([Nikita Taranov](https://github.com/nickitat)).
* Add missing includes when _LIBCPP_REMOVE_TRANSITIVE_INCLUDES enabled [#58770](https://github.com/ClickHouse/ClickHouse/pull/58770) ([Artem Alperin](https://github.com/hdnpth)).
* Remove some code [#58772](https://github.com/ClickHouse/ClickHouse/pull/58772) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove some code [#58790](https://github.com/ClickHouse/ClickHouse/pull/58790) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix trash in performance tests [#58794](https://github.com/ClickHouse/ClickHouse/pull/58794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in Keeper [#58806](https://github.com/ClickHouse/ClickHouse/pull/58806) ([Antonio Andelic](https://github.com/antonio2368)).
* Increase log level to trace to help debug `00993_system_parts_race_condition_drop_zookeeper` [#58809](https://github.com/ClickHouse/ClickHouse/pull/58809) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* DatabaseCatalog background tasks add log names [#58832](https://github.com/ClickHouse/ClickHouse/pull/58832) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer: Resolve GROUPING function on shards [#58833](https://github.com/ClickHouse/ClickHouse/pull/58833) ([Dmitry Novik](https://github.com/novikd)).
* Allow parallel replicas for JOIN with analyzer [part 1]. [#58838](https://github.com/ClickHouse/ClickHouse/pull/58838) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `isRetry` method [#58839](https://github.com/ClickHouse/ClickHouse/pull/58839) ([alesapin](https://github.com/alesapin)).
* fs cache: fix data race in slru [#58842](https://github.com/ClickHouse/ClickHouse/pull/58842) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix reading from an invisible part in new (not released yet) parallel replicas coordinator [#58844](https://github.com/ClickHouse/ClickHouse/pull/58844) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bad log message [#58849](https://github.com/ClickHouse/ClickHouse/pull/58849) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Set max_bytes_before_external_group_by in 01961_roaring_memory_tracking [#58863](https://github.com/ClickHouse/ClickHouse/pull/58863) ([vdimir](https://github.com/vdimir)).
* Fix `00089_group_by_arrays_of_fixed` with external aggregation [#58873](https://github.com/ClickHouse/ClickHouse/pull/58873) ([Antonio Andelic](https://github.com/antonio2368)).
* DiskWeb minor improvement in loading [#58874](https://github.com/ClickHouse/ClickHouse/pull/58874) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix RPN construction for indexHint [#58875](https://github.com/ClickHouse/ClickHouse/pull/58875) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: add test with GROUP BY on shards [#58876](https://github.com/ClickHouse/ClickHouse/pull/58876) ([Dmitry Novik](https://github.com/novikd)).
* Jepsen job to reuse builds [#58881](https://github.com/ClickHouse/ClickHouse/pull/58881) ([Max K.](https://github.com/maxknv)).
* Fix ambiguity in the setting description [#58883](https://github.com/ClickHouse/ClickHouse/pull/58883) ([Denny Crane](https://github.com/den-crane)).
* Less error prone interface of read buffers [#58886](https://github.com/ClickHouse/ClickHouse/pull/58886) ([Anton Popov](https://github.com/CurtizJ)).
* Add metric for keeper memory soft limit [#58890](https://github.com/ClickHouse/ClickHouse/pull/58890) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Add a test for [#47988](https://github.com/ClickHouse/ClickHouse/issues/47988) [#58893](https://github.com/ClickHouse/ClickHouse/pull/58893) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Whitespaces [#58894](https://github.com/ClickHouse/ClickHouse/pull/58894) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in `AggregatingTransform` [#58896](https://github.com/ClickHouse/ClickHouse/pull/58896) ([Antonio Andelic](https://github.com/antonio2368)).
* Update SLRUFileCachePriority.cpp [#58898](https://github.com/ClickHouse/ClickHouse/pull/58898) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add tests for [#57193](https://github.com/ClickHouse/ClickHouse/issues/57193) [#58899](https://github.com/ClickHouse/ClickHouse/pull/58899) ([Raúl Marín](https://github.com/Algunenano)).
* Add log for already download binary in Jepsen [#58901](https://github.com/ClickHouse/ClickHouse/pull/58901) ([Antonio Andelic](https://github.com/antonio2368)).
* fs cache: minor refactoring [#58902](https://github.com/ClickHouse/ClickHouse/pull/58902) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Checking on flaky test_parallel_replicas_custom_key_failover [#58909](https://github.com/ClickHouse/ClickHouse/pull/58909) ([Igor Nikonov](https://github.com/devcrafter)).
* Style fix [#58913](https://github.com/ClickHouse/ClickHouse/pull/58913) ([Dmitry Novik](https://github.com/novikd)).
* Opentelemetry spans to analyze CPU and S3 bottlenecks on inserts [#58914](https://github.com/ClickHouse/ClickHouse/pull/58914) ([Alexander Gololobov](https://github.com/davenger)).
* Fix fault handler in case of thread (for fault handler) cannot be spawned [#58917](https://github.com/ClickHouse/ClickHouse/pull/58917) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Support GROUP BY injective function elimination [#58919](https://github.com/ClickHouse/ClickHouse/pull/58919) ([Dmitry Novik](https://github.com/novikd)).
* Cancel MasterCI in PRs [#58920](https://github.com/ClickHouse/ClickHouse/pull/58920) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix and test for azure [#58697](https://github.com/ClickHouse/ClickHouse/issues/58697) [#58921](https://github.com/ClickHouse/ClickHouse/pull/58921) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Extend performance test norm_dist.xml [#58922](https://github.com/ClickHouse/ClickHouse/pull/58922) ([Robert Schulze](https://github.com/rschu1ze)).
* Add regression test for parallel replicas (follow up [#58722](https://github.com/ClickHouse/ClickHouse/issues/58722), [#58844](https://github.com/ClickHouse/ClickHouse/issues/58844)) [#58923](https://github.com/ClickHouse/ClickHouse/pull/58923) ([Nikita Taranov](https://github.com/nickitat)).
* Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892) [#58927](https://github.com/ClickHouse/ClickHouse/pull/58927) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `FunctionToSubcolumnsPass` in debug build [#58930](https://github.com/ClickHouse/ClickHouse/pull/58930) ([Anton Popov](https://github.com/CurtizJ)).
* Call `getMaxFileDescriptorCount` once in Keeper [#58938](https://github.com/ClickHouse/ClickHouse/pull/58938) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing files to digests [#58942](https://github.com/ClickHouse/ClickHouse/pull/58942) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer: fix join column not found with compound identifiers [#58943](https://github.com/ClickHouse/ClickHouse/pull/58943) ([vdimir](https://github.com/vdimir)).
* CI: pr_info to provide event_type for job scripts [#58947](https://github.com/ClickHouse/ClickHouse/pull/58947) ([Max K.](https://github.com/maxknv)).
* Using the destination object for paths generation in S3copy. [#58949](https://github.com/ClickHouse/ClickHouse/pull/58949) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix data race in slru (2) [#58950](https://github.com/ClickHouse/ClickHouse/pull/58950) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix flaky test_postgresql_replica_database_engine_2/test.py::test_dependent_loading [#58951](https://github.com/ClickHouse/ClickHouse/pull/58951) ([Kseniia Sumarokova](https://github.com/kssenii)).
* More safe way to dump system logs in tests [#58955](https://github.com/ClickHouse/ClickHouse/pull/58955) ([alesapin](https://github.com/alesapin)).
* Add a comment about sparse checkout [#58960](https://github.com/ClickHouse/ClickHouse/pull/58960) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Follow up to [#58357](https://github.com/ClickHouse/ClickHouse/issues/58357) [#58963](https://github.com/ClickHouse/ClickHouse/pull/58963) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Better error message about tuples [#58971](https://github.com/ClickHouse/ClickHouse/pull/58971) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive) [#58981](https://github.com/ClickHouse/ClickHouse/pull/58981) ([Azat Khuzhin](https://github.com/azat)).
* Fix 02891_array_shingles with analyzer [#58982](https://github.com/ClickHouse/ClickHouse/pull/58982) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix script name in SQL example in executable.md [#58984](https://github.com/ClickHouse/ClickHouse/pull/58984) ([Lino Uruñuela](https://github.com/Wachynaky)).
* Fix typo [#58986](https://github.com/ClickHouse/ClickHouse/pull/58986) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Revert flaky [#58992](https://github.com/ClickHouse/ClickHouse/pull/58992) ([Raúl Marín](https://github.com/Algunenano)).
* Revive: Parallel replicas custom key: skip unavailable replicas [#58993](https://github.com/ClickHouse/ClickHouse/pull/58993) ([Igor Nikonov](https://github.com/devcrafter)).
* Make performance test `test norm_dist.xml` more realistic [#58995](https://github.com/ClickHouse/ClickHouse/pull/58995) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix 02404_memory_bound_merging with analyzer (follow up [#56419](https://github.com/ClickHouse/ClickHouse/issues/56419)) [#58996](https://github.com/ClickHouse/ClickHouse/pull/58996) ([Nikita Taranov](https://github.com/nickitat)).
* Add test for [#58930](https://github.com/ClickHouse/ClickHouse/issues/58930) [#58999](https://github.com/ClickHouse/ClickHouse/pull/58999) ([Anton Popov](https://github.com/CurtizJ)).
* initialization ConnectionTimeouts [#59000](https://github.com/ClickHouse/ClickHouse/pull/59000) ([Sema Checherinda](https://github.com/CheSema)).
* DiskWeb fix loading [#59006](https://github.com/ClickHouse/ClickHouse/pull/59006) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update log level for http buffer [#59008](https://github.com/ClickHouse/ClickHouse/pull/59008) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)).
* Fix async loader stress test [#59011](https://github.com/ClickHouse/ClickHouse/pull/59011) ([Sergei Trifonov](https://github.com/serxa)).
* Remove `StaticResourceManager` [#59013](https://github.com/ClickHouse/ClickHouse/pull/59013) ([Sergei Trifonov](https://github.com/serxa)).
* preserve 'amz-sdk-invocation-id' and 'amz-sdk-request' headers with gcp [#59015](https://github.com/ClickHouse/ClickHouse/pull/59015) ([Sema Checherinda](https://github.com/CheSema)).
* Update rename.md [#59017](https://github.com/ClickHouse/ClickHouse/pull/59017) ([filimonov](https://github.com/filimonov)).
* очепятка [#59024](https://github.com/ClickHouse/ClickHouse/pull/59024) ([edpyt](https://github.com/edpyt)).
* Split resource scheduler off `IO/` into `Common/Scheduler/` [#59025](https://github.com/ClickHouse/ClickHouse/pull/59025) ([Sergei Trifonov](https://github.com/serxa)).
* Add a parameter for testing purposes [#59027](https://github.com/ClickHouse/ClickHouse/pull/59027) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix test 02932_kill_query_sleep when running with query cache [#59041](https://github.com/ClickHouse/ClickHouse/pull/59041) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: Jepsen: fix sanity check in ci.py [#59043](https://github.com/ClickHouse/ClickHouse/pull/59043) ([Max K.](https://github.com/maxknv)).
* CI: add ci_config classes for job and build names [#59046](https://github.com/ClickHouse/ClickHouse/pull/59046) ([Max K.](https://github.com/maxknv)).
* remove flaky test [#59066](https://github.com/ClickHouse/ClickHouse/pull/59066) ([Sema Checherinda](https://github.com/CheSema)).
* Followup to 57853 [#59068](https://github.com/ClickHouse/ClickHouse/pull/59068) ([Dmitry Novik](https://github.com/novikd)).
* Follow-up to [#59027](https://github.com/ClickHouse/ClickHouse/issues/59027) [#59075](https://github.com/ClickHouse/ClickHouse/pull/59075) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `test_parallel_replicas_invisible_parts` [#59077](https://github.com/ClickHouse/ClickHouse/pull/59077) ([Nikita Taranov](https://github.com/nickitat)).
* Increase max_bytes_before_external_group_by for 00165_jit_aggregate_functions [#59078](https://github.com/ClickHouse/ClickHouse/pull/59078) ([Raúl Marín](https://github.com/Algunenano)).
* Fix stateless/run.sh [#59079](https://github.com/ClickHouse/ClickHouse/pull/59079) ([Kseniia Sumarokova](https://github.com/kssenii)).
* CI: hot fix for reuse [#59081](https://github.com/ClickHouse/ClickHouse/pull/59081) ([Max K.](https://github.com/maxknv)).
* Fix server shutdown due to exception while loading metadata [#59083](https://github.com/ClickHouse/ClickHouse/pull/59083) ([Sergei Trifonov](https://github.com/serxa)).
* Coordinator returns ranges for reading in sorted order [#59089](https://github.com/ClickHouse/ClickHouse/pull/59089) ([Nikita Taranov](https://github.com/nickitat)).
* Raise timeout in 02294_decimal_second_errors [#59090](https://github.com/ClickHouse/ClickHouse/pull/59090) ([Raúl Marín](https://github.com/Algunenano)).
* Add `[[nodiscard]]` to a couple of methods [#59093](https://github.com/ClickHouse/ClickHouse/pull/59093) ([Nikita Taranov](https://github.com/nickitat)).
* Docs: Update integer and float aliases [#59100](https://github.com/ClickHouse/ClickHouse/pull/59100) ([Robert Schulze](https://github.com/rschu1ze)).
* Avoid election timeouts during startup in Keeper [#59102](https://github.com/ClickHouse/ClickHouse/pull/59102) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing setting max_estimated_execution_time in SettingsChangesHistory [#59104](https://github.com/ClickHouse/ClickHouse/pull/59104) ([Kruglov Pavel](https://github.com/Avogar)).
* Rename some inverted index test files [#59106](https://github.com/ClickHouse/ClickHouse/pull/59106) ([Robert Schulze](https://github.com/rschu1ze)).
* Further reduce runtime of `norm_distance.xml` [#59108](https://github.com/ClickHouse/ClickHouse/pull/59108) ([Robert Schulze](https://github.com/rschu1ze)).
* Minor follow-up to [#53710](https://github.com/ClickHouse/ClickHouse/issues/53710) [#59109](https://github.com/ClickHouse/ClickHouse/pull/59109) ([Robert Schulze](https://github.com/rschu1ze)).
* Update stateless/run.sh [#59116](https://github.com/ClickHouse/ClickHouse/pull/59116) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Followup 57875 [#59117](https://github.com/ClickHouse/ClickHouse/pull/59117) ([Dmitry Novik](https://github.com/novikd)).
* Fixing build [#59130](https://github.com/ClickHouse/ClickHouse/pull/59130) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Capability check for `s3_plain` [#59145](https://github.com/ClickHouse/ClickHouse/pull/59145) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `02015_async_inserts_stress_long` [#59146](https://github.com/ClickHouse/ClickHouse/pull/59146) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix AggregateFunctionNothing result type issues introducing it with different names [#59147](https://github.com/ClickHouse/ClickHouse/pull/59147) ([vdimir](https://github.com/vdimir)).
* Fix url encoding issue [#59162](https://github.com/ClickHouse/ClickHouse/pull/59162) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Upgrade simdjson to v3.6.3 [#59166](https://github.com/ClickHouse/ClickHouse/pull/59166) ([Robert Schulze](https://github.com/rschu1ze)).
* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix broken cache for non-existing temp_path [#59172](https://github.com/ClickHouse/ClickHouse/pull/59172) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Move some headers [#59175](https://github.com/ClickHouse/ClickHouse/pull/59175) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Analyzer: Fix CTE name clash resolution [#59177](https://github.com/ClickHouse/ClickHouse/pull/59177) ([Dmitry Novik](https://github.com/novikd)).
* Fix another place with special symbols in the URL [#59184](https://github.com/ClickHouse/ClickHouse/pull/59184) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Actions dag build filter actions refactoring [#59228](https://github.com/ClickHouse/ClickHouse/pull/59228) ([Maksim Kita](https://github.com/kitaisreal)).
* Minor cleanup of msan usage [#59229](https://github.com/ClickHouse/ClickHouse/pull/59229) ([Robert Schulze](https://github.com/rschu1ze)).
* Load server configs in clickhouse local [#59231](https://github.com/ClickHouse/ClickHouse/pull/59231) ([pufit](https://github.com/pufit)).
* Make libssh build dependent on `-DENABLE_LIBRARIES` [#59242](https://github.com/ClickHouse/ClickHouse/pull/59242) ([Robert Schulze](https://github.com/rschu1ze)).
* Disable copy constructor for MultiVersion [#59244](https://github.com/ClickHouse/ClickHouse/pull/59244) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: fix ci configuration for nightly job [#59252](https://github.com/ClickHouse/ClickHouse/pull/59252) ([Max K.](https://github.com/maxknv)).
* Fix 02475_bson_each_row_format flakiness (due to small parsing block) [#59253](https://github.com/ClickHouse/ClickHouse/pull/59253) ([Azat Khuzhin](https://github.com/azat)).
* Improve pytest --pdb experience by preserving dockerd on SIGINT (v2) [#59255](https://github.com/ClickHouse/ClickHouse/pull/59255) ([Azat Khuzhin](https://github.com/azat)).
* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)).
* Added AtomicLogger [#59273](https://github.com/ClickHouse/ClickHouse/pull/59273) ([Maksim Kita](https://github.com/kitaisreal)).
* Update test_reload_after_fail_in_cache_dictionary for analyzer [#59274](https://github.com/ClickHouse/ClickHouse/pull/59274) ([vdimir](https://github.com/vdimir)).
* Update run.sh [#59280](https://github.com/ClickHouse/ClickHouse/pull/59280) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add missing setting optimize_injective_functions_in_group_by to SettingsChangesHistory [#59283](https://github.com/ClickHouse/ClickHouse/pull/59283) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix perf tests (after sumMap starts to filter out -0.) [#59287](https://github.com/ClickHouse/ClickHouse/pull/59287) ([Azat Khuzhin](https://github.com/azat)).
* Use fresh ZooKeeper client on DROP (to have higher chances on success) [#59288](https://github.com/ClickHouse/ClickHouse/pull/59288) ([Azat Khuzhin](https://github.com/azat)).
* Additional check [#59292](https://github.com/ClickHouse/ClickHouse/pull/59292) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlock in `AsyncLoader::stop()` [#59308](https://github.com/ClickHouse/ClickHouse/pull/59308) ([Sergei Trifonov](https://github.com/serxa)).
* Speed up `00165_jit_aggregate_functions` [#59312](https://github.com/ClickHouse/ClickHouse/pull/59312) ([Nikita Taranov](https://github.com/nickitat)).
* CI: WA for issue with perf test with artifact reuse [#59325](https://github.com/ClickHouse/ClickHouse/pull/59325) ([Max K.](https://github.com/maxknv)).
* Fix typo [#59329](https://github.com/ClickHouse/ClickHouse/pull/59329) ([Raúl Marín](https://github.com/Algunenano)).
* Simplify query_run_metric_arrays in perf tests [#59333](https://github.com/ClickHouse/ClickHouse/pull/59333) ([Raúl Marín](https://github.com/Algunenano)).
* IVolume constructor improve exception message [#59335](https://github.com/ClickHouse/ClickHouse/pull/59335) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix upgrade check for new setting [#59343](https://github.com/ClickHouse/ClickHouse/pull/59343) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix sccache when building without coverage [#59345](https://github.com/ClickHouse/ClickHouse/pull/59345) ([Raúl Marín](https://github.com/Algunenano)).
* Loggers initialization fix [#59347](https://github.com/ClickHouse/ClickHouse/pull/59347) ([Maksim Kita](https://github.com/kitaisreal)).
* Add setting update_insert_deduplication_token_in_dependent_materialized_views to settings changes history [#59349](https://github.com/ClickHouse/ClickHouse/pull/59349) ([Maksim Kita](https://github.com/kitaisreal)).
* Slightly better memory usage in `AsynchronousBoundedReadBuffer` [#59354](https://github.com/ClickHouse/ClickHouse/pull/59354) ([Anton Popov](https://github.com/CurtizJ)).
* Try to make variant tests a bit faster [#59355](https://github.com/ClickHouse/ClickHouse/pull/59355) ([Kruglov Pavel](https://github.com/Avogar)).
* Minor typos in Settings.h [#59371](https://github.com/ClickHouse/ClickHouse/pull/59371) ([Jordi Villar](https://github.com/jrdi)).
* Rename `quantileDDSketch` to `quantileDD` [#59372](https://github.com/ClickHouse/ClickHouse/pull/59372) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -109,6 +109,9 @@ Do not check for a particular wording of error message, it may change in the fut
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries.
### Working with Temporary Files
Sometimes in a shell test you may need to create a file on the fly to work with. Keep in mind that some CI checks run tests in parallel, so if you are creating or removing a temporary file in your script without a unique name this can cause some of the CI checks, such as Flaky, to fail. To get around this you should use environment variable `$CLICKHOUSE_TEST_UNIQUE_NAME` to give temporary files a name unique to the test that is running. That way you can be sure that the file you are creating during setup or removing during cleanup is the file only in use by that test and not some other test which is running in parallel.
## Known Bugs {#known-bugs}

View File

@ -0,0 +1,342 @@
---
slug: /en/getting-started/example-datasets/noaa
sidebar_label: NOAA Global Historical Climatology Network
sidebar_position: 1
description: 2.5 billion rows of climate data for the last 120 yrs
---
# NOAA Global Historical Climatology Network
This dataset contains weather measurements for the last 120 years. Each row is a measurement for a point in time and station.
More precisely and according to the [origin of this data](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
> GHCN-Daily is a dataset that contains daily observations over global land areas. It contains station-based measurements from land-based stations worldwide, about two-thirds of which are for precipitation measurements only (Menne et al., 2012). GHCN-Daily is a composite of climate records from numerous sources that were merged together and subjected to a common suite of quality assurance reviews (Durre et al., 2010). The archive includes the following meteorological elements:
- Daily maximum temperature
- Daily minimum temperature
- Temperature at the time of observation
- Precipitation (i.e., rain, melted snow)
- Snowfall
- Snow depth
- Other elements where available
## Downloading the data
- A [pre-prepared version](#pre-prepared-data) of the data for ClickHouse, which has been cleansed, re-structured, and enriched. This data covers the years 1900 to 2022.
- [Download the original data](#original-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore this approach.
### Pre-prepared data
More specifically, rows have been removed that did not fail any quality assurance checks by Noaa. The data has also been restructured from a measurement per line to a row per station id and date, i.e.
```csv
"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
```
This is simpler to query and ensures the resulting table is less sparse. Finally, the data has also been enriched with latitude and longitude.
This data is available in the following S3 location. Either download the data to your local filesystem (and insert using the ClickHouse client) or insert directly into ClickHouse (see [Inserting from S3](#inserting-from-s3)).
To download:
```bash
wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet
```
### Original data
The following details the steps to download and transform the original data in preparation for loading into ClickHouse.
#### Download
To download the original data:
```bash
for i in {1900..2023}; do wget https://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/${i}.csv.gz; done
```
#### Sampling the data
```bash
$ clickhouse-local --query "SELECT * FROM '2021.csv.gz' LIMIT 10" --format PrettyCompact
┌─c1──────────┬───────c2─┬─c3───┬──c4─┬─c5───┬─c6───┬─c7─┬───c8─┐
│ AE000041196 │ 20210101 │ TMAX │ 278 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AE000041196 │ 20210101 │ PRCP │ 0 │ D │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AE000041196 │ 20210101 │ TAVG │ 214 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TMAX │ 266 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TMIN │ 178 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ PRCP │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TAVG │ 217 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TMAX │ 262 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TMIN │ 155 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TAVG │ 202 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
└─────────────┴──────────┴──────┴─────┴──────┴──────┴────┴──────┘
```
Summarizing the [format documentation](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
Summarizing the format documentation and the columns in order:
- An 11 character station identification code. This itself encodes some useful information
- YEAR/MONTH/DAY = 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986)
- ELEMENT = 4 character indicator of element type. Effectively the measurement type. While there are many measurements available, we select the following:
- PRCP - Precipitation (tenths of mm)
- SNOW - Snowfall (mm)
- SNWD - Snow depth (mm)
- TMAX - Maximum temperature (tenths of degrees C)
- TAVG - Average temperature (tenths of a degree C)
- TMIN - Minimum temperature (tenths of degrees C)
- PSUN - Daily percent of possible sunshine (percent)
- AWND - Average daily wind speed (tenths of meters per second)
- WSFG - Peak gust wind speed (tenths of meters per second)
- WT** = Weather Type where ** defines the weather type. Full list of weather types here.
- DATA VALUE = 5 character data value for ELEMENT i.e. the value of the measurement.
- M-FLAG = 1 character Measurement Flag. This has 10 possible values. Some of these values indicate questionable data accuracy. We accept data where this is set to “P” - identified as missing presumed zero, as this is only relevant to the PRCP, SNOW and SNWD measurements.
- Q-FLAG is the measurement quality flag with 14 possible values. We are only interested in data with an empty value i.e. it did not fail any quality assurance checks.
- S-FLAG is the source flag for the observation. Not useful for our analysis and ignored.
- OBS-TIME = 4-character time of observation in hour-minute format (i.e. 0700 =7:00 am). Typically not present in older data. We ignore this for our purposes.
A measurement per line would result in a sparse table structure in ClickHouse. We should transform to a row per time and station, with measurements as columns. First, we limit the dataset to those rows without issues i.e. where `qFlag` is equal to an empty string.
#### Clean the data
Using [ClickHouse local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) we can filter rows that represent measurements of interest and pass our quality requirements:
```bash
clickhouse local --query "SELECT count()
FROM file('*.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))"
2679264563
```
With over 2.6 billion rows, this isnt a fast query since it involves parsing all the files. On our 8 core machine, this takes around 160 seconds.
### Pivot data
While the measurement per line structure can be used with ClickHouse, it will unnecessarily complicate future queries. Ideally, we need a row per station id and date, where each measurement type and associated value are a column i.e.
```csv
"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
```
Using ClickHouse local and a simple `GROUP BY`, we can repivot our data to this structure. To limit memory overhead, we do this one file at a time.
```bash
for i in {1900..2022}
do
clickhouse-local --query "SELECT station_id,
toDate32(date) as date,
anyIf(value, measurement = 'TAVG') as tempAvg,
anyIf(value, measurement = 'TMAX') as tempMax,
anyIf(value, measurement = 'TMIN') as tempMin,
anyIf(value, measurement = 'PRCP') as precipitation,
anyIf(value, measurement = 'SNOW') as snowfall,
anyIf(value, measurement = 'SNWD') as snowDepth,
anyIf(value, measurement = 'PSUN') as percentDailySun,
anyIf(value, measurement = 'AWND') as averageWindSpeed,
anyIf(value, measurement = 'WSFG') as maxWindSpeed,
toUInt8OrZero(replaceOne(anyIf(measurement, startsWith(measurement, 'WT') AND value = 1), 'WT', '')) as weatherType
FROM file('$i.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String')
WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))
GROUP BY station_id, date
ORDER BY station_id, date FORMAT CSV" >> "noaa.csv";
done
```
This query produces a single 50GB file `noaa.csv`.
### Enriching the data
The data has no indication of location aside from a station id, which includes a prefix country code. Ideally, each station would have a latitude and longitude associated with it. To achieve this, NOAA conveniently provides the details of each station as a separate [ghcnd-stations.txt](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file). This file has [several columns](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file), of which five are useful to our future analysis: id, latitude, longitude, elevation, and name.
```bash
wget http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt
```
```bash
clickhouse local --query "WITH stations AS (SELECT id, lat, lon, elevation, splitByString(' GSN ',name)[1] as name FROM file('ghcnd-stations.txt', Regexp, 'id String, lat Float64, lon Float64, elevation Float32, name String'))
SELECT station_id,
date,
tempAvg,
tempMax,
tempMin,
precipitation,
snowfall,
snowDepth,
percentDailySun,
averageWindSpeed,
maxWindSpeed,
weatherType,
tuple(lon, lat) as location,
elevation,
name
FROM file('noaa.csv', CSV,
'station_id String, date Date32, tempAvg Int32, tempMax Int32, tempMin Int32, precipitation Int32, snowfall Int32, snowDepth Int32, percentDailySun Int8, averageWindSpeed Int32, maxWindSpeed Int32, weatherType UInt8') as noaa LEFT OUTER
JOIN stations ON noaa.station_id = stations.id INTO OUTFILE 'noaa_enriched.parquet' FORMAT Parquet SETTINGS format_regexp='^(.{11})\s+(\-?\d{1,2}\.\d{4})\s+(\-?\d{1,3}\.\d{1,4})\s+(\-?\d*\.\d*)\s+(.*)\s+(?:[\d]*)'"
```
This query takes a few minutes to run and produces a 6.4 GB file, `noaa_enriched.parquet`.
## Create table
Create a MergeTree table in ClickHouse (from the ClickHouse client).
```sql
CREATE TABLE noaa
(
`station_id` LowCardinality(String),
`date` Date32,
`tempAvg` Int32 COMMENT 'Average temperature (tenths of a degrees C)',
`tempMax` Int32 COMMENT 'Maximum temperature (tenths of degrees C)',
`tempMin` Int32 COMMENT 'Minimum temperature (tenths of degrees C)',
`precipitation` UInt32 COMMENT 'Precipitation (tenths of mm)',
`snowfall` UInt32 COMMENT 'Snowfall (mm)',
`snowDepth` UInt32 COMMENT 'Snow depth (mm)',
`percentDailySun` UInt8 COMMENT 'Daily percent of possible sunshine (percent)',
`averageWindSpeed` UInt32 COMMENT 'Average daily wind speed (tenths of meters per second)',
`maxWindSpeed` UInt32 COMMENT 'Peak gust wind speed (tenths of meters per second)',
`weatherType` Enum8('Normal' = 0, 'Fog' = 1, 'Heavy Fog' = 2, 'Thunder' = 3, 'Small Hail' = 4, 'Hail' = 5, 'Glaze' = 6, 'Dust/Ash' = 7, 'Smoke/Haze' = 8, 'Blowing/Drifting Snow' = 9, 'Tornado' = 10, 'High Winds' = 11, 'Blowing Spray' = 12, 'Mist' = 13, 'Drizzle' = 14, 'Freezing Drizzle' = 15, 'Rain' = 16, 'Freezing Rain' = 17, 'Snow' = 18, 'Unknown Precipitation' = 19, 'Ground Fog' = 21, 'Freezing Fog' = 22),
`location` Point,
`elevation` Float32,
`name` LowCardinality(String)
) ENGINE = MergeTree() ORDER BY (station_id, date);
```
## Inserting into ClickHouse
### Inserting from local file
Data can be inserted from a local file as follows (from the ClickHouse client):
```sql
INSERT INTO noaa FROM INFILE '<path>/noaa_enriched.parquet'
```
where `<path>` represents the full path to the local file on disk.
See [here](https://clickhouse.com/blog/real-world-data-noaa-climate-data#load-the-data) for how to speed this load up.
### Inserting from S3
```sql
INSERT INTO noaa SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet')
```
For how to speed this up, see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2).
## Sample queries
### Highest temperature ever
```sql
SELECT
tempMax / 10 AS maxTemp,
location,
name,
date
FROM blogs.noaa
WHERE tempMax > 500
ORDER BY
tempMax DESC,
date ASC
LIMIT 5
┌─maxTemp─┬─location──────────┬─name───────────────────────────────────────────┬───────date─┐
│ 56.7 │ (-116.8667,36.45) │ CA GREENLAND RCH │ 1913-07-10 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-08-20 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-09-18 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-07-17 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-09-04 │
└─────────┴───────────────────┴────────────────────────────────────────────────┴────────────┘
5 rows in set. Elapsed: 0.514 sec. Processed 1.06 billion rows, 4.27 GB (2.06 billion rows/s., 8.29 GB/s.)
```
Reassuringly consistent with the [documented record](https://en.wikipedia.org/wiki/List_of_weather_records#Highest_temperatures_ever_recorded) at [Furnace Creek](https://www.google.com/maps/place/36%C2%B027'00.0%22N+116%C2%B052'00.1%22W/@36.1329666,-116.1104099,8.95z/data=!4m5!3m4!1s0x0:0xf2ed901b860f4446!8m2!3d36.45!4d-116.8667) as of 2023.
### Best ski resorts
Using a [list of ski resorts](https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv) in the united states and their respective locations, we join these against the top 1000 weather stations with the most in any month in the last 5 yrs. Sorting this join by [geoDistance](https://clickhouse.com/docs/en/sql-reference/functions/geo/coordinates/#geodistance) and restricting the results to those where the distance is less than 20km, we select the top result per resort and sort this by total snow. Note we also restrict resorts to those above 1800m, as a broad indicator of good skiing conditions.
```sql
SELECT
resort_name,
total_snow / 1000 AS total_snow_m,
resort_location,
month_year
FROM
(
WITH resorts AS
(
SELECT
resort_name,
state,
(lon, lat) AS resort_location,
'US' AS code
FROM url('https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv', CSVWithNames)
)
SELECT
resort_name,
highest_snow.station_id,
geoDistance(resort_location.1, resort_location.2, station_location.1, station_location.2) / 1000 AS distance_km,
highest_snow.total_snow,
resort_location,
station_location,
month_year
FROM
(
SELECT
sum(snowfall) AS total_snow,
station_id,
any(location) AS station_location,
month_year,
substring(station_id, 1, 2) AS code
FROM noaa
WHERE (date > '2017-01-01') AND (code = 'US') AND (elevation > 1800)
GROUP BY
station_id,
toYYYYMM(date) AS month_year
ORDER BY total_snow DESC
LIMIT 1000
) AS highest_snow
INNER JOIN resorts ON highest_snow.code = resorts.code
WHERE distance_km < 20
ORDER BY
resort_name ASC,
total_snow DESC
LIMIT 1 BY
resort_name,
station_id
)
ORDER BY total_snow DESC
LIMIT 5
┌─resort_name──────────┬─total_snow_m─┬─resort_location─┬─month_year─┐
│ Sugar Bowl, CA │ 7.799 │ (-120.3,39.27) │ 201902 │
│ Donner Ski Ranch, CA │ 7.799 │ (-120.34,39.31) │ 201902 │
│ Boreal, CA │ 7.799 │ (-120.35,39.33) │ 201902 │
│ Homewood, CA │ 4.926 │ (-120.17,39.08) │ 201902 │
│ Alpine Meadows, CA │ 4.926 │ (-120.22,39.17) │ 201902 │
└──────────────────────┴──────────────┴─────────────────┴────────────┘
5 rows in set. Elapsed: 0.750 sec. Processed 689.10 million rows, 3.20 GB (918.20 million rows/s., 4.26 GB/s.)
Peak memory usage: 67.66 MiB.
```
## Credits
We would like to acknowledge the efforts of the Global Historical Climatology Network for preparing, cleansing, and distributing this data. We appreciate your efforts.
Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray, R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network - Daily (GHCN-Daily), Version 3. [indicate subset used following decimal, e.g. Version 3.25]. NOAA National Centers for Environmental Information. http://doi.org/10.7289/V5D21VHZ [17/08/2020]

View File

@ -197,6 +197,29 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
## Aliases {#cli_aliases}
- `\l` - SHOW DATABASES
- `\d` - SHOW TABLES
- `\c <DATABASE>` - USE DATABASE
- `.` - repeat the last query
## Shortkeys {#shortkeys_aliases}
- `Alt (Option) + Shift + e` - open editor with current query. It is possible to set up an environment variable - `EDITOR`, by default vim is used.
- `Alt (Option) + #` - comment line.
- `Ctrl + r` - fuzzy history search.
:::tip
To configure the correct work of meta key (Option) on MacOS:
iTerm2: Go to Preferences -> Profile -> Keys -> Left Option key and click Esc+
:::
The full list with all available shortkeys - [replxx](https://github.com/AmokHuginnsson/replxx/blob/1f149bf/src/replxx_impl.cxx#L262).
## Connection string {#connection_string}
clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:

View File

@ -25,6 +25,8 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.

View File

@ -28,8 +28,10 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.
## See Also {#see-also}

View File

@ -45,11 +45,11 @@ clickhouse-benchmark [keys] < queries_file;
- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1.
- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys.
- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever).
- `-r`, `--randomize` — Random order of queries execution if there is more than one input query.
- `-s`, `--secure` — Using `TLS` connection.
- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled).
- `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `--port` keys.
- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Students t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions arent different with the selected level of confidence.
- `--cumulative` — Printing cumulative data instead of data per interval.
- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`.

View File

@ -0,0 +1,48 @@
---
toc_priority: 112
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```

View File

@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
@ -88,7 +89,7 @@ ClickHouse-specific aggregate functions:
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
@ -105,4 +106,3 @@ ClickHouse-specific aggregate functions:
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -18,7 +18,7 @@ Functions:
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
- `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
**Example**

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
sidebar_position: 211
title: quantileDDSketch
title: quantileDD
---
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
**Syntax**
@ -44,13 +44,13 @@ Input table has an integer and a float columns:
Query to calculate 0.75-quantile (third quartile):
``` sql
SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table;
SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table;
```
Result:
``` text
┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐
┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐
│ 2.974233423476717 │ 1.01 │
└─────────────────────────────────┴─────────────────────────────────┘
```

View File

@ -9,7 +9,7 @@ sidebar_position: 201
Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive

View File

@ -156,6 +156,35 @@ SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantEleme
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
To know what variant is stored in each row function `variantType(variant_column)` can be used. It returns `Enum` with variant type name for each row (or `'None'` if row is `NULL`).
Example:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) from test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## Conversion between Variant column and other columns
There are 3 possible conversions that can be performed with Variant column.

View File

@ -2869,6 +2869,51 @@ SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantEleme
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
## variantType
Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns `'None'` for it.
**Syntax**
```sql
variantType(variant)
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
**Returned value**
- Enum8 column with variant type name for each row.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) FROM test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## minSampleSizeConversion
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.

View File

@ -21,7 +21,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -26,8 +26,11 @@ slug: /ru/operations/system-tables/quota_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}

View File

@ -29,9 +29,10 @@ slug: /ru/operations/system-tables/quotas_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -22,7 +22,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -20,7 +20,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -559,12 +559,33 @@ AccessChangesNotifier & AccessControl::getChangesNotifier()
}
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const
{
// NOTE: In the case where the user has never been logged in using LDAP,
// Then user_id is not generated, and the authentication quota will always be nullptr.
auto authentication_quota = getAuthenticationQuota(credentials.getUserName(), address, forwarded_address);
if (authentication_quota)
{
/// Reserve a single try from the quota to check whether we have another authentication try.
/// This is required for correct behavior in this situation:
/// User has 1 login failures quota.
/// * At the first login with an invalid password: Increase the quota counter. 1 (used) > 1 (max) is false.
///   Then try to authenticate the user and throw an AUTHENTICATION_FAILED error.
/// * In case of the second try: increase quota counter, 2 (used) > 1 (max), then throw QUOTA_EXCEED
///   and don't let the user authenticate.
///
/// The authentication failures counter will be reset after successful authentication.
authentication_quota->used(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS, 1);
}
try
{
return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password,
allow_plaintext_password);
const auto auth_result = MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password,
allow_plaintext_password);
if (authentication_quota)
authentication_quota->reset(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS);
return auth_result;
}
catch (...)
{
@ -763,7 +784,34 @@ std::shared_ptr<const EnabledQuota> AccessControl::getEnabledQuota(
const String & forwarded_address,
const String & custom_quota_key) const
{
return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key);
return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key, true);
}
std::shared_ptr<const EnabledQuota> AccessControl::getAuthenticationQuota(
const String & user_name, const Poco::Net::IPAddress & address, const std::string & forwarded_address) const
{
auto user_id = find<User>(user_name);
UserPtr user;
if (user_id && (user = tryRead<User>(*user_id)))
{
const auto new_current_roles = user->granted_roles.findGranted(user->default_roles);
const auto roles_info = getEnabledRolesInfo(new_current_roles, {});
// client_key is not received at the moment of authentication during TCP connection
// if key type is set to QuotaKeyType::CLIENT_KEY
// QuotaCache::QuotaInfo::calculateKey will throw exception without throw_if_client_key_empty = false
String quota_key;
bool throw_if_client_key_empty = false;
return quota_cache->getEnabledQuota(*user_id,
user->getName(),
roles_info->enabled_roles,
address,
forwarded_address,
quota_key,
throw_if_client_key_empty);
}
else
return nullptr;
}

View File

@ -118,7 +118,7 @@ public:
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const;
/// Makes a backup of access entities.
void restoreFromBackup(RestorerFromBackup & restorer) override;
@ -206,6 +206,11 @@ public:
const String & forwarded_address,
const String & custom_quota_key) const;
std::shared_ptr<const EnabledQuota> getAuthenticationQuota(
const String & user_name,
const Poco::Net::IPAddress & address,
const std::string & forwarded_address) const;
std::vector<QuotaUsage> getAllQuotasUsage() const;
std::shared_ptr<const EnabledSettings> getEnabledSettings(

View File

@ -111,6 +111,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
static const auto info = make_info("WRITTEN_BYTES", 1);
return info;
}
case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS:
{
static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1);
return info;
}
case QuotaType::MAX: break;
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected quota type: {}", static_cast<int>(type));

View File

@ -11,16 +11,17 @@ using QuotaValue = UInt64;
/// Kinds of resource what we wish to quota.
enum class QuotaType
{
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
FAILED_SEQUENTIAL_AUTHENTICATIONS, /// Number of recent failed authentications.
MAX
};

View File

@ -105,6 +105,16 @@ struct EnabledQuota::Impl
std::uniform_int_distribution<Int64> distribution{0, count - 1};
return std::chrono::system_clock::duration(distribution(thread_local_rng));
}
static void resetQuotaValue(const Intervals & intervals, QuotaType quota_type, QuotaValue value, std::chrono::system_clock::time_point current_time)
{
const auto quota_type_i = static_cast<size_t>(quota_type);
for (const auto & interval : intervals.intervals)
{
interval.used[quota_type_i] = value;
interval.getEndOfInterval(current_time);
}
}
};
@ -285,6 +295,12 @@ void EnabledQuota::checkExceeded(QuotaType quota_type) const
}
void EnabledQuota::reset(QuotaType quota_type) const
{
const auto loaded = intervals.load();
Impl::resetQuotaValue(*loaded, quota_type, 0, std::chrono::system_clock::now());
}
std::optional<QuotaUsage> EnabledQuota::getUsage() const
{
auto loaded = intervals.load();

View File

@ -52,6 +52,8 @@ public:
void checkExceeded() const;
void checkExceeded(QuotaType quota_type) const;
void reset(QuotaType quota_type) const;
/// Returns the information about quota consumption.
std::optional<QuotaUsage> getUsage() const;

View File

@ -30,7 +30,7 @@ void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota
}
String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const
String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled, bool throw_if_client_key_empty) const
{
const auto & params = enabled.params;
switch (quota->key_type)
@ -55,8 +55,15 @@ String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const
{
if (!params.client_key.empty())
return params.client_key;
throw Exception(ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY, "Quota {} (for user {}) requires a client supplied key.",
quota->getName(), params.user_name);
if (throw_if_client_key_empty)
throw Exception(
ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY,
"Quota {} (for user {}) requires a client supplied key.",
quota->getName(),
params.user_name);
else
return ""; // Authentication quota has no client key at time of authentication.
}
case QuotaKeyType::CLIENT_KEY_OR_USER_NAME:
{
@ -165,7 +172,14 @@ QuotaCache::QuotaCache(const AccessControl & access_control_)
QuotaCache::~QuotaCache() = default;
std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & user_id, const String & user_name, const boost::container::flat_set<UUID> & enabled_roles, const Poco::Net::IPAddress & client_address, const String & forwarded_address, const String & client_key)
std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(
const UUID & user_id,
const String & user_name,
const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & client_address,
const String & forwarded_address,
const String & client_key,
bool throw_if_client_key_empty)
{
std::lock_guard lock{mutex};
ensureAllQuotasRead();
@ -188,11 +202,10 @@ std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & use
auto res = std::shared_ptr<EnabledQuota>(new EnabledQuota(params));
enabled_quotas.emplace(std::move(params), res);
chooseQuotaToConsumeFor(*res);
chooseQuotaToConsumeFor(*res, throw_if_client_key_empty);
return res;
}
void QuotaCache::ensureAllQuotasRead()
{
/// `mutex` is already locked.
@ -257,13 +270,13 @@ void QuotaCache::chooseQuotaToConsume()
i = enabled_quotas.erase(i);
else
{
chooseQuotaToConsumeFor(*elem);
chooseQuotaToConsumeFor(*elem, true);
++i;
}
}
}
void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled)
void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled, bool throw_if_client_key_empty)
{
/// `mutex` is already locked.
boost::shared_ptr<const Intervals> intervals;
@ -271,7 +284,7 @@ void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled)
{
if (info.roles->match(enabled.params.user_id, enabled.params.enabled_roles))
{
String key = info.calculateKey(enabled);
String key = info.calculateKey(enabled, throw_if_client_key_empty);
intervals = info.getOrBuildIntervals(key);
break;
}

View File

@ -28,7 +28,8 @@ public:
const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & address,
const String & forwarded_address,
const String & client_key);
const String & client_key,
bool throw_if_client_key_empty);
std::vector<QuotaUsage> getAllQuotasUsage() const;
@ -41,7 +42,7 @@ private:
QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); }
void setQuota(const QuotaPtr & quota_, const UUID & quota_id_);
String calculateKey(const EnabledQuota & enabled_quota) const;
String calculateKey(const EnabledQuota & enabled_quota, bool throw_if_client_key_empty) const;
boost::shared_ptr<const Intervals> getOrBuildIntervals(const String & key);
boost::shared_ptr<const Intervals> rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time);
void rebuildAllIntervals();
@ -56,7 +57,7 @@ private:
void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr<const Quota> & new_quota);
void quotaRemoved(const UUID & quota_id);
void chooseQuotaToConsume();
void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota);
void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota, bool throw_if_client_key_empty);
const AccessControl & access_control;
mutable std::mutex mutex;

View File

@ -291,8 +291,17 @@ public:
const UInt64 size = value.size();
checkArraySize(size, max_elems);
writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last)
writeBinaryLittleEndian(this->data(place).total_values, buf);
@ -315,8 +324,16 @@ public:
auto & value = this->data(place).value;
value.resize_exact(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (auto & element : value)
readBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last)
readBinaryLittleEndian(this->data(place).total_values, buf);

View File

@ -0,0 +1,414 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <base/sort.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#include <Common/RadixSort.h>
#include <Common/Exception.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnConst.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int TOO_LARGE_ARRAY_SIZE;
}
namespace
{
enum class GroupArraySortedStrategy
{
heap,
sort
};
constexpr size_t group_array_sorted_sort_strategy_max_elements_threshold = 1000000;
template <typename T, GroupArraySortedStrategy strategy>
struct GroupArraySortedData
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
static constexpr size_t partial_sort_max_elements_factor = 2;
static constexpr bool is_value_generic_field = std::is_same_v<T, Field>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
if constexpr (is_value_generic_field)
{
return lhs < rhs;
}
else
{
return CompareHelper<T>::less(lhs, rhs, -1);
}
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void heapReplaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
if constexpr (is_value_generic_field)
{
::sort(values.begin(), values.end(), Comparator());
}
else
{
bool try_sort = trySort(values.begin(), values.end(), Comparator());
if (!try_sort)
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
}
if (values.size() > max_elements)
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * partial_sort_max_elements_factor)
return;
::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(T && element, size_t max_elements, Arena * arena)
{
if constexpr (strategy == GroupArraySortedStrategy::heap)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = std::move(element);
heapReplaceTop();
return;
}
values.push_back(std::move(element), arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
else
{
values.push_back(std::move(element), arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
}
ALWAYS_INLINE void insertResultInto(IColumn & to, size_t max_elements, Arena * arena)
{
auto & result_array = assert_cast<ColumnArray &>(to);
auto & result_array_offsets = result_array.getOffsets();
sortAndLimit(max_elements, arena);
result_array_offsets.push_back(result_array_offsets.back() + values.size());
if (values.empty())
return;
if constexpr (is_value_generic_field)
{
auto & result_array_data = result_array.getData();
for (auto & value : values)
result_array_data.insert(value);
}
else
{
auto & result_array_data = assert_cast<ColumnVector<T> &>(result_array.getData()).getData();
size_t result_array_data_insert_begin = result_array_data.size();
result_array_data.resize(result_array_data_insert_begin + values.size());
for (size_t i = 0; i < values.size(); ++i)
result_array_data[result_array_data_insert_begin + i] = values[i];
}
}
};
template <typename T>
using GroupArraySortedDataHeap = GroupArraySortedData<T, GroupArraySortedStrategy::heap>;
template <typename T>
using GroupArraySortedDataSort = GroupArraySortedData<T, GroupArraySortedStrategy::sort>;
constexpr UInt64 aggregate_function_group_array_sorted_max_element_size = 0xFFFFFF;
template <typename Data, typename T>
class GroupArraySorted final
: public IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>
{
public:
explicit GroupArraySorted(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elements_)
: IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elements(max_elements_)
, serialization(data_type_->getDefaultSerialization())
{
if (max_elements > aggregate_function_group_array_sorted_max_element_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Too large limit parameter for groupArraySorted aggregate function, it should not exceed {}",
aggregate_function_group_array_sorted_max_element_size);
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (std::is_same_v<T, Field>)
{
auto row_value = (*columns[0])[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
else
{
auto row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & rhs_values = this->data(rhs).values;
for (auto rhs_element : rhs_values)
this->data(place).addElement(std::move(rhs_element), max_elements, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & values = this->data(place).values;
size_t size = values.size();
writeVarUInt(size, buf);
if constexpr (std::is_same_v<T, Field>)
{
for (const Field & element : values)
{
if (element.isNull())
{
writeBinary(false, buf);
}
else
{
writeBinary(true, buf);
serialization->serializeBinary(element, buf, {});
}
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (const auto & element : values)
writeBinaryLittleEndian(element, buf);
}
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elements))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elements);
auto & values = this->data(place).values;
values.resize_exact(size, arena);
if constexpr (std::is_same_v<T, Field>)
{
for (Field & element : values)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
serialization->deserializeBinary(element, buf, {});
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (auto & element : values)
readBinaryLittleEndian(element, buf);
}
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
this->data(place).insertResultInto(to, max_elements, arena);
}
bool allocatesMemoryInArena() const override { return true; }
private:
UInt64 max_elements;
SerializationPtr serialization;
};
template <typename T>
using GroupArraySortedHeap = GroupArraySorted<GroupArraySortedDataHeap<T>, T>;
template <typename T>
using GroupArraySortedSort = GroupArraySorted<GroupArraySortedDataSort<T>, T>;
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
}
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<AggregateFunctionTemplate>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
return std::make_shared<AggregateFunctionTemplate<Field>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
AggregateFunctionPtr createAggregateFunctionGroupArray(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
if (parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
}
else if (parameters.size() == 1)
{
auto type = parameters[0].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} does not support this number of arguments", name);
if (max_elems > group_array_sorted_sort_strategy_max_elements_threshold)
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedSort>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedHeap>(argument_types[0], parameters, max_elems);
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArray, properties });
}
}

View File

@ -31,7 +31,7 @@ namespace ErrorCodes
template <typename> class QuantileTiming;
template <typename> class QuantileGK;
template <typename> class QuantileDDSketch;
template <typename> class QuantileDD;
/** Generic aggregate function for calculation of quantiles.
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -64,7 +64,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDDSketch<Value>>;
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDD<Value>>;
static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels;
@ -334,7 +334,7 @@ struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBF
struct NameQuantileGK { static constexpr auto name = "quantileGK"; };
struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; };
struct NameQuantileDDSketch { static constexpr auto name = "quantileDDSketch"; };
struct NameQuantilesDDSketch { static constexpr auto name = "quantilesDDSketch"; };
struct NameQuantileDD { static constexpr auto name = "quantileDD"; };
struct NameQuantilesDD { static constexpr auto name = "quantilesDD"; };
}

View File

@ -1,5 +1,5 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileDDSketch.h>
#include <AggregateFunctions/QuantileDD.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
@ -19,8 +19,8 @@ namespace ErrorCodes
namespace
{
template <typename Value, bool float_return> using FuncQuantileDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantileDDSketch, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantilesDDSketch, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <typename Value, bool float_return> using FuncQuantileDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantileDD, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantilesDD, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <template <typename, bool> class Function>
@ -46,16 +46,16 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
}
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory & factory)
void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory & factory)
{
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileDDSketch::name, createAggregateFunctionQuantile<FuncQuantileDDSketch>);
factory.registerFunction(NameQuantilesDDSketch::name, { createAggregateFunctionQuantile<FuncQuantilesDDSketch>, properties });
factory.registerFunction(NameQuantileDD::name, createAggregateFunctionQuantile<FuncQuantileDD>);
factory.registerFunction(NameQuantilesDD::name, { createAggregateFunctionQuantile<FuncQuantilesDD>, properties });
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianDDSketch", NameQuantileDDSketch::name);
factory.registerAlias("medianDD", NameQuantileDD::name);
}
}

View File

@ -33,14 +33,14 @@ namespace DB
*/
template <typename Value>
class QuantileDDSketch
class QuantileDD
{
public:
using Weight = UInt64;
QuantileDDSketch() = default;
QuantileDD() = default;
explicit QuantileDDSketch(Float64 relative_accuracy) : data(relative_accuracy) { }
explicit QuantileDD(Float64 relative_accuracy) : data(relative_accuracy) { }
void add(const Value & x)
{
@ -53,7 +53,7 @@ public:
data.add(x, w);
}
void merge(const QuantileDDSketch &other)
void merge(const QuantileDD &other)
{
data.merge(other.data);
}

View File

@ -1,2 +1,5 @@
clickhouse_add_executable (quantile-t-digest quantile-t-digest.cpp)
target_link_libraries (quantile-t-digest PRIVATE dbms clickhouse_aggregate_functions)
clickhouse_add_executable (group_array_sorted group_array_sorted.cpp)
target_link_libraries (group_array_sorted PRIVATE dbms clickhouse_aggregate_functions)

View File

@ -0,0 +1,205 @@
#include <algorithm>
#include <type_traits>
#include <utility>
#include <iostream>
#include "pcg_random.hpp"
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/RadixSort.h>
#include <Columns/ColumnArray.h>
using namespace DB;
template <typename T>
struct GroupArraySortedDataHeap
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void replaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = element;
replaceTop();
return;
}
values.push_back(element, arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
ALWAYS_INLINE void dump()
{
while (!values.empty())
{
std::pop_heap(values.begin(), values.end(), Comparator());
std::cerr << values.back() << ' ';
values.pop_back();
}
std::cerr << '\n';
}
};
template <typename T>
struct GroupArraySortedDataSort
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * 4)
return;
std::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
values.push_back(element, arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
};
template <typename SortedData>
NO_INLINE void benchmark(size_t elements, size_t max_elements)
{
Stopwatch watch;
watch.start();
SortedData data;
pcg64_fast rng;
Arena arena;
for (size_t i = 0; i < elements; ++i)
{
uint64_t value = rng();
data.addElement(value, max_elements, &arena);
}
watch.stop();
std::cerr << "Elapsed " << watch.elapsedMilliseconds() << " milliseconds" << '\n';
}
int main(int argc, char ** argv)
{
(void)(argc);
(void)(argv);
if (argc != 4)
{
std::cerr << "./group_array_sorted method elements max_elements" << '\n';
return 1;
}
std::string method = std::string(argv[1]);
uint64_t elements = std::atol(argv[2]); /// NOLINT
uint64_t max_elements = std::atol(argv[3]); /// NOLINT
std::cerr << "Method " << method << " elements " << elements << " max elements " << max_elements << '\n';
if (method == "heap")
{
benchmark<GroupArraySortedDataHeap<UInt64>>(elements, max_elements);
}
else if (method == "sort")
{
benchmark<GroupArraySortedDataSort<UInt64>>(elements, max_elements);
}
else
{
std::cerr << "Invalid method " << method << '\n';
return 1;
}
return 0;
}

View File

@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
@ -31,7 +32,7 @@ void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
@ -112,6 +113,7 @@ void registerAggregateFunctions()
registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionDeltaSumTimestamp(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory);
@ -128,7 +130,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigest(factory);
registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileDDSketch(factory);
registerAggregateFunctionsQuantileDD(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory);

View File

@ -1214,7 +1214,7 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@ -2334,8 +2334,11 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
query_tree_node_typed.setIsGroupByAll(false);
}
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings)
{
if (!settings.enable_order_by_all || !query_tree_node_typed.isOrderByAll())
return;
auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
if (!all_node)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
@ -2361,6 +2364,7 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
}
query_tree_node_typed.getOrderByNode() = list_node;
query_tree_node_typed.setIsOrderByAll(false);
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
@ -7176,8 +7180,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
expandOrderByAll(query_node_typed);
expandOrderByAll(query_node_typed, settings);
/// Initialize aliases in query node scope
QueryExpressionsAliasVisitor visitor(scope);

View File

@ -119,6 +119,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_group_by_all)
buffer << ", is_group_by_all: " << is_group_by_all;
if (is_order_by_all)
buffer << ", is_order_by_all: " << is_order_by_all;
std::string group_by_type;
if (is_group_by_with_rollup)
group_by_type = "rollup";
@ -257,6 +260,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
is_group_by_all == rhs_typed.is_group_by_all &&
is_order_by_all == rhs_typed.is_order_by_all &&
cte_name == rhs_typed.cte_name &&
projection_columns == rhs_typed.projection_columns &&
settings_changes == rhs_typed.settings_changes;
@ -288,6 +292,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
state.update(is_group_by_with_cube);
state.update(is_group_by_with_grouping_sets);
state.update(is_group_by_all);
state.update(is_order_by_all);
state.update(settings_changes.size());
@ -306,18 +311,19 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
{
auto result_query_node = std::make_shared<QueryNode>(context);
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->is_order_by_all = is_order_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
return result_query_node;
}
@ -332,6 +338,7 @@ ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
select_query->group_by_with_cube = is_group_by_with_cube;
select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
select_query->group_by_all = is_group_by_all;
select_query->order_by_all = is_order_by_all;
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));

View File

@ -32,6 +32,8 @@ namespace ErrorCodes
M(UInt64, shard_num) \
M(UInt64, replica_num) \
M(Bool, check_parts) \
M(Bool, check_projection_parts) \
M(Bool, allow_backup_broken_projections) \
M(Bool, internal) \
M(String, host_id) \
M(OptionalUUID, backup_uuid)

View File

@ -62,6 +62,12 @@ struct BackupSettings
/// Check checksums of the data parts before writing them to a backup.
bool check_parts = true;
/// Check checksums of the projection data parts before writing them to a backup.
bool check_projection_parts = true;
/// Allow to create backup with broken projections.
bool allow_backup_broken_projections = false;
/// Internal, should not be specified by user.
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
bool internal = false;

View File

@ -308,6 +308,13 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
res.column = function->execute(columns, res.type, elements_size);
if (res.column->getDataType() != res.type->getColumnType())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected return type from {}. Expected {}. Got {}",
function->getName(),
res.type->getColumnType(),
res.column->getDataType());
if (recursively_convert_result_to_full_column_if_low_cardinality)
{
res.column = recursiveRemoveLowCardinality(res.column);

View File

@ -592,6 +592,7 @@
M(710, FAULT_INJECTED) \
M(711, FILECACHE_ACCESS_DENIED) \
M(712, TOO_MANY_MATERIALIZED_VIEWS) \
M(713, BROKEN_PROJECTION) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -154,7 +154,7 @@ class IColumn;
M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \
\
M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misuse", 0) \
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
@ -230,7 +230,7 @@ class IColumn;
\
M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, use_skip_indexes, true, "Use data skinipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
\
@ -253,7 +253,7 @@ class IColumn;
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log/query_views_log.", 0) \
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
M(Float, log_queries_probability, 1., "Log queries with the specified probabality.", 0) \
M(Float, log_queries_probability, 1., "Log queries with the specified probability.", 0) \
\
M(Bool, log_processors_profiles, false, "Log Processors profile events.", 0) \
M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \
@ -261,8 +261,8 @@ class IColumn;
M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \
M(UInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.", 0) \
\
M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \
M(Bool, async_insert_deduplicate, false, "For async INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \
M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed", 0) \
M(Bool, async_insert_deduplicate, false, "For async INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed", 0) \
\
M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled, 'auto' - use majority", 0) \
M(Milliseconds, insert_quorum_timeout, 600000, "If the quorum of replicas did not meet in specified time (in milliseconds), exception will be thrown and insertion is aborted.", 0) \
@ -587,11 +587,11 @@ class IColumn;
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped taraget table during pushing to views", 0) \
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
@ -765,8 +765,8 @@ class IColumn;
\
M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \
M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \
M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
\
M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \
M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
@ -811,7 +811,7 @@ class IColumn;
\
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
\
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelization of query processing right after reading from storage if possible", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)", 0) \
@ -1105,7 +1105,7 @@ class IColumn;
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\
M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there're joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
\
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
M(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, "Use signed integers for dictionary indexes in Arrow format", 0) \

View File

@ -12,6 +12,7 @@ public:
static constexpr auto family_name = "Date";
TypeIndex getTypeId() const override { return TypeIndex::Date; }
TypeIndex getColumnType() const override { return TypeIndex::UInt16; }
const char * getFamilyName() const override { return family_name; }
bool canBeUsedAsVersion() const override { return true; }

View File

@ -12,6 +12,7 @@ public:
static constexpr auto family_name = "Date32";
TypeIndex getTypeId() const override { return TypeIndex::Date32; }
TypeIndex getColumnType() const override { return TypeIndex::Int32; }
const char * getFamilyName() const override { return family_name; }
Field getDefault() const override

View File

@ -40,6 +40,7 @@ public:
const char * getFamilyName() const override { return family_name; }
String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
TypeIndex getColumnType() const override { return TypeIndex::UInt32; }
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }

View File

@ -54,6 +54,7 @@ public:
const char * getFamilyName() const override;
TypeIndex getTypeId() const override { return type_id; }
TypeIndex getColumnType() const override { return sizeof(FieldType) == 1 ? TypeIndex::Int8 : TypeIndex::Int16; }
FieldType readValue(ReadBuffer & istr) const
{

View File

@ -28,6 +28,7 @@ public:
std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); }
const char * getFamilyName() const override { return "Interval"; }
TypeIndex getTypeId() const override { return TypeIndex::Interval; }
TypeIndex getColumnType() const override { return TypeIndex::Int64; }
bool equals(const IDataType & rhs) const override;

View File

@ -86,6 +86,8 @@ public:
/// Data type id. It's used for runtime type checks.
virtual TypeIndex getTypeId() const = 0;
/// Storage type (e.g. Int64 for Interval)
virtual TypeIndex getColumnType() const { return getTypeId(); }
bool hasSubcolumn(std::string_view subcolumn_name) const;

View File

@ -48,12 +48,32 @@ using namespace GatherUtils;
/** Selection function by condition: if(cond, then, else).
* cond - UInt8
* then, else - numeric types for which there is a general type, or dates, datetimes, or strings, or arrays of these types.
* For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b), except floating point types because of Inf or NaN.
* For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b)
*/
template <typename ResultType>
concept is_native_int_or_decimal_v
= std::is_integral_v<ResultType> || (is_decimal<ResultType> && sizeof(ResultType) <= 8);
// This macro performs a branch-free conditional assignment for floating point types.
// It uses bitwise operations to avoid branching, which can be beneficial for performance.
#define BRANCHFREE_IF_FLOAT(TYPE, vc, va, vb, vr) \
using UIntType = typename NumberTraits::Construct<false, false, sizeof(TYPE)>::Type; \
using IntType = typename NumberTraits::Construct<true, false, sizeof(TYPE)>::Type; \
auto mask = static_cast<UIntType>(static_cast<IntType>(vc) - 1); \
auto new_a = static_cast<ResultType>(va); \
auto new_b = static_cast<ResultType>(vb); \
UIntType uint_a; \
std::memcpy(&uint_a, &new_a, sizeof(UIntType)); \
UIntType uint_b; \
std::memcpy(&uint_b, &new_b, sizeof(UIntType)); \
UIntType tmp = (~mask & uint_a) | (mask & uint_b); \
(vr) = *(reinterpret_cast<ResultType *>(&tmp));
template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
{
size_t size = cond.size();
bool a_is_short = a.size() < size;
bool b_is_short = b.size() < size;
@ -63,47 +83,68 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr
size_t a_index = 0, b_index = 0;
for (size_t i = 0; i < size; ++i)
{
if constexpr (std::is_integral_v<ResultType>)
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
a_index += !!cond[i];
b_index += !cond[i];
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[b_index], res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[b_index++]);
res[i] = cond[i] ? static_cast<ResultType>(a[a_index]) : static_cast<ResultType>(b[b_index]);
a_index += !!cond[i];
b_index += !cond[i];
}
}
else if (a_is_short)
{
size_t a_index = 0;
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[i]);
a_index += !!cond[i];
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b[i], res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[i]);
res[i] = cond[i] ? static_cast<ResultType>(a[a_index]) : static_cast<ResultType>(b[i]);
a_index += !!cond[i];
}
}
else if (b_is_short)
{
size_t b_index = 0;
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[b_index], res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index++]);
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
}
}
else
{
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[i]);
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b[i], res[i])
}
else
{
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
}
}
}
}
@ -116,21 +157,32 @@ inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, Ar
{
size_t a_index = 0;
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b);
a_index += !!cond[i];
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[a_index], b, res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b);
res[i] = cond[i] ? static_cast<ResultType>(a[a_index]) : static_cast<ResultType>(b);
a_index += !!cond[i];
}
}
else
{
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b);
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a[i], b, res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
}
}
}
@ -143,21 +195,68 @@ inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, Ar
{
size_t b_index = 0;
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[b_index], res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index++]);
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
}
}
else
{
for (size_t i = 0; i < size; ++i)
if constexpr (std::is_integral_v<ResultType>)
{
if constexpr (is_native_int_or_decimal_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[i]);
else if constexpr (std::is_floating_point_v<ResultType>)
{
BRANCHFREE_IF_FLOAT(ResultType, cond[i], a, b[i], res[i])
}
else
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
}
}
}
template <typename ArrayCond, typename A, typename B, typename ArrayResult, typename ResultType>
inline void fillConstantConstant(const ArrayCond & cond, A a, B b, ArrayResult & res)
{
size_t size = cond.size();
/// Int8(alias type of uint8_t) has special aliasing properties that prevents compiler from auto-vectorizing for below codes, refer to https://gist.github.com/alexei-zaripov/dcc14c78819c5f1354afe8b70932007c
///
/// for (size_t i = 0; i < size; ++i)
/// res[i] = cond[i] ? static_cast<Int8>(a) : static_cast<Int8>(b);
///
/// Therefore, we manually optimize it by avoiding branch miss when ResultType is Int8. Other types like (U)Int128|256 or Decimal128/256 also benefit from this optimization.
if constexpr (std::is_same_v<ResultType, Int8> || is_over_big_int<ResultType>)
{
alignas(64) const ResultType ab[2] = {static_cast<ResultType>(a), static_cast<ResultType>(b)};
for (size_t i = 0; i < size; ++i)
{
res[i] = ab[!cond[i]];
}
}
else if constexpr (std::is_same_v<ResultType, Decimal32> || std::is_same_v<ResultType, Decimal64>)
{
ResultType new_a = static_cast<ResultType>(a);
ResultType new_b = static_cast<ResultType>(b);
for (size_t i = 0; i < size; ++i)
{
/// Reuse new_a and new_b to achieve auto-vectorization
res[i] = cond[i] ? new_a : new_b;
}
}
else
{
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b);
}
}
@ -203,8 +302,7 @@ struct NumIfImpl
auto col_res = ColVecResult::create(size);
ArrayResult & res = col_res->getData();
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b);
fillConstantConstant<ArrayCond, A, B, ArrayResult, ResultType>(cond, a, b, res);
return col_res;
}
};
@ -253,8 +351,7 @@ struct NumIfImpl<Decimal<A>, Decimal<B>, Decimal<R>>
auto col_res = ColVecResult::create(size, scale);
ArrayResult & res = col_res->getData();
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b);
fillConstantConstant<ArrayCond, A, B, ArrayResult, ResultType>(cond, a, b, res);
return col_res;
}
};
@ -1227,17 +1324,12 @@ public:
if (cond_const_col)
{
if (arg_then.type->equals(*arg_else.type))
{
return cond_const_col->getValue<UInt8>()
? arg_then.column
: arg_else.column;
}
UInt8 value = cond_const_col->getValue<UInt8>();
const ColumnWithTypeAndName & arg = value ? arg_then : arg_else;
if (arg.type->equals(*result_type))
return arg.column;
else
{
materialized_cond_col = cond_const_col->convertToFullColumn();
cond_col = typeid_cast<const ColumnUInt8 *>(&*materialized_cond_col);
}
return castColumn(arg, result_type);
}
if (!cond_col)
@ -1274,6 +1366,8 @@ public:
TypeIndex left_id = left_type->getTypeId();
TypeIndex right_id = right_type->getTypeId();
/// TODO optimize for map type
/// TODO optimize for nullable type
if (!(callOnBasicTypes<true, true, true, false>(left_id, right_id, call)
|| (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count))
|| (res = executeString(cond_col, arguments, result_type))

View File

@ -1,12 +1,15 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Common/iota.h>
#include <Functions/FunctionHelpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/UTF8Helpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/iota.h>
#include <numeric>
@ -298,7 +301,14 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}",
arguments[2]->getName(), getName());
return std::make_shared<DataTypeString>();
if (isString(arguments[0]))
return std::make_shared<DataTypeString>();
else
{
const auto * ptr = checkAndGetDataType<DataTypeFixedString>(arguments[0].get());
chassert(ptr);
return std::make_shared<DataTypeFixedString>(ptr->getN());
}
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override

View File

@ -212,7 +212,7 @@ REGISTER_FUNCTION(VariantElement)
.description = R"(
Extracts a column with specified type from a `Variant` column.
)",
.syntax{"tupleElement(variant, type_name, [, default_value])"},
.syntax{"variantElement(variant, type_name, [, default_value])"},
.arguments{{
{"variant", "Variant column"},
{"type_name", "The name of the variant type to extract"},

View File

@ -0,0 +1,111 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/DataTypeEnum.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Common/assert_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
/// Return enum with type name for each row in Variant column.
class FunctionVariantType : public IFunction
{
public:
static constexpr auto name = "variantType";
static constexpr auto enum_name_for_null = "None";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionVariantType>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty() || arguments.size() > 1)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1",
getName(), arguments.empty());
const DataTypeVariant * variant_type = checkAndGetDataType<DataTypeVariant>(arguments[0].type.get());
if (!variant_type)
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Variant, got {} instead",
getName(), arguments[0].type->getName());
const auto & variants = variant_type->getVariants();
std::vector<std::pair<String, Int8>> enum_values;
enum_values.reserve(variants.size() + 1);
for (ColumnVariant::Discriminator i = 0; i != variants.size(); ++i)
enum_values.emplace_back(variants[i]->getName(), i);
enum_values.emplace_back(enum_name_for_null, ColumnVariant::NULL_DISCRIMINATOR);
return std::make_shared<DataTypeEnum<Int8>>(enum_values);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnVariant * variant_column = checkAndGetColumn<ColumnVariant>(arguments[0].column.get());
if (!variant_column)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Variant, got {} instead",
getName(), arguments[0].type->getName());
auto res = result_type->createColumn();
auto & res_data = typeid_cast<ColumnInt8 *>(res.get())->getData();
res_data.reserve(input_rows_count);
for (size_t i = 0; i != input_rows_count; ++i)
res_data.push_back(variant_column->globalDiscriminatorAt(i));
return res;
}
};
}
REGISTER_FUNCTION(VariantType)
{
factory.registerFunction<FunctionVariantType>(FunctionDocumentation{
.description = R"(
Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns 'None' for it.
)",
.syntax = {"variantType(variant)"},
.arguments = {{"variant", "Variant column"}},
.examples = {{{
"Example",
R"(
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) FROM test;)",
R"(
variantType(v)
None
UInt64
String
Array(UInt64)
)"}}},
.categories{"Variant"},
});
}
}

View File

@ -1,11 +1,12 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferWrapperBase.h>
namespace DB
{
class CompressedReadBufferWrapper : public BufferWithOwnMemory<ReadBuffer>
class CompressedReadBufferWrapper : public BufferWithOwnMemory<ReadBuffer>, public ReadBufferWrapperBase
{
public:
CompressedReadBufferWrapper(
@ -16,7 +17,7 @@ public:
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
, in(std::move(in_)) {}
const ReadBuffer & getWrappedReadBuffer() const { return *in; }
const ReadBuffer & getWrappedReadBuffer() const override { return *in; }
ReadBuffer & getWrappedReadBuffer() { return *in; }
void prefetch(Priority priority) override { in->prefetch(priority); }

View File

@ -1,4 +1,5 @@
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferWrapperBase.h>
namespace DB
@ -7,7 +8,7 @@ namespace DB
namespace
{
template <typename CustomData>
class ReadBufferWrapper : public ReadBuffer
class ReadBufferWrapper : public ReadBuffer, public ReadBufferWrapperBase
{
public:
ReadBufferWrapper(ReadBuffer & in_, CustomData && custom_data_)
@ -15,6 +16,8 @@ namespace
{
}
const ReadBuffer & getWrappedReadBuffer() const override { return in; }
private:
ReadBuffer & in;
CustomData custom_data;

View File

@ -0,0 +1,15 @@
#pragma once
#include <IO/ReadBuffer.h>
namespace DB
{
class ReadBufferWrapperBase
{
public:
virtual const ReadBuffer & getWrappedReadBuffer() const = 0;
virtual ~ReadBufferWrapperBase() = default;
};
}

View File

@ -1,5 +1,5 @@
#include <IO/WithFileName.h>
#include <IO/CompressedReadBufferWrapper.h>
#include <IO/ReadBufferWrapperBase.h>
#include <IO/ParallelReadBuffer.h>
#include <IO/PeekableReadBuffer.h>
@ -16,10 +16,10 @@ static String getFileName(const T & entry)
String getFileNameFromReadBuffer(const ReadBuffer & in)
{
if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
return getFileName(compressed->getWrappedReadBuffer());
if (const auto * wrapper = dynamic_cast<const ReadBufferWrapperBase *>(&in))
return getFileNameFromReadBuffer(wrapper->getWrappedReadBuffer());
else if (const auto * parallel = dynamic_cast<const ParallelReadBuffer *>(&in))
return getFileName(parallel->getReadBuffer());
return getFileNameFromReadBuffer(parallel->getReadBuffer());
else if (const auto * peekable = dynamic_cast<const PeekableReadBuffer *>(&in))
return getFileNameFromReadBuffer(peekable->getSubBuffer());
else

View File

@ -282,6 +282,13 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
{
size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size();
column = node.function->execute(arguments, node.result_type, num_rows, true);
if (column->getDataType() != node.result_type->getColumnType())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected return type from {}. Expected {}. Got {}",
node.function->getName(),
node.result_type->getColumnType(),
column->getDataType());
}
else
{

View File

@ -1414,10 +1414,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool
set_key = right_in_operand->getTreeHash(/*ignore_aliases=*/ true);
if (auto set = data.prepared_sets->findSubquery(set_key))
{
set->markAsINSubquery();
return set;
}
FutureSetFromSubqueryPtr external_table_set;
@ -1464,7 +1461,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool
}
return data.prepared_sets->addFromSubquery(
set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef(), /*in_subquery=*/true);
set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef());
}
else
{

View File

@ -611,6 +611,13 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
res_column.column = action.node->function->execute(arguments, res_column.type, num_rows, dry_run);
if (res_column.column->getDataType() != res_column.type->getColumnType())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected return type from {}. Expected {}. Got {}",
action.node->function->getName(),
res_column.type->getColumnType(),
res_column.column->getDataType());
}
break;
}

View File

@ -32,6 +32,7 @@ namespace ErrorCodes
{
extern const int WRONG_GLOBAL_SUBQUERY;
extern const int LOGICAL_ERROR;
extern const int SUPPORT_IS_DISABLED;
}
class GlobalSubqueriesMatcher
@ -200,23 +201,33 @@ public:
}
private:
static bool shouldBeExecutedGlobally(const Data & data)
{
const Settings & settings = data.getContext()->getSettingsRef();
/// For parallel replicas we reinterpret JOIN as GLOBAL JOIN as a way to broadcast data
const bool enable_parallel_processing_of_joins = data.getContext()->canUseParallelReplicasOnInitiator();
return settings.prefer_global_in_and_join || enable_parallel_processing_of_joins;
}
/// GLOBAL IN
static void visit(ASTFunction & func, ASTPtr &, Data & data)
{
if ((shouldBeExecutedGlobally(data)
const Settings & settings = data.getContext()->getSettingsRef();
const bool prefer_global = settings.prefer_global_in_and_join;
const bool enable_parallel_processing_of_joins = data.getContext()->canUseParallelReplicasOnInitiator();
if (((prefer_global || enable_parallel_processing_of_joins)
&& (func.name == "in" || func.name == "notIn" || func.name == "nullIn" || func.name == "notNullIn"))
|| func.name == "globalIn" || func.name == "globalNotIn" || func.name == "globalNullIn" || func.name == "globalNotNullIn")
{
ASTPtr & ast = func.arguments->children[1];
if (enable_parallel_processing_of_joins)
{
/// We don't enable parallel replicas for IN (subquery)
if (ast->as<ASTSubquery>())
{
if (settings.allow_experimental_parallel_reading_from_replicas == 1)
{
LOG_DEBUG(getLogger("GlobalSubqueriesMatcher"), "IN with subquery is not supported with parallel replicas");
data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
return;
}
else if (settings.allow_experimental_parallel_reading_from_replicas == 2)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas");
}
}
/// Literal or function can use regular IN.
/// NOTE: We don't support passing table functions to IN.
@ -241,9 +252,39 @@ private:
/// GLOBAL JOIN
static void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &, Data & data)
{
const Settings & settings = data.getContext()->getSettingsRef();
const bool prefer_global = settings.prefer_global_in_and_join;
const bool enable_parallel_processing_of_joins = data.getContext()->canUseParallelReplicasOnInitiator();
if (table_elem.table_join
&& (table_elem.table_join->as<ASTTableJoin &>().locality == JoinLocality::Global || shouldBeExecutedGlobally(data)))
&& (table_elem.table_join->as<ASTTableJoin &>().locality == JoinLocality::Global || prefer_global
|| enable_parallel_processing_of_joins))
{
if (enable_parallel_processing_of_joins)
{
/// For parallel replicas we currently only support JOIN with subqueries
/// Note that tableA join tableB is previously converted into tableA JOIN (Select * FROM tableB) so that's ok
/// We don't support WITH cte as (subquery) Select table JOIN cte because we don't do conversion in AST
bool is_subquery = false;
if (const auto * ast_table_expr = table_elem.table_expression->as<ASTTableExpression>())
is_subquery = ast_table_expr->subquery->as<ASTSubquery>() != nullptr
&& ast_table_expr->subquery->as<ASTSubquery>()->cte_name.empty();
else if (table_elem.table_expression->as<ASTSubquery>())
is_subquery = true;
if (!is_subquery)
{
if (settings.allow_experimental_parallel_reading_from_replicas == 1)
{
LOG_DEBUG(getLogger("GlobalSubqueriesMatcher"), "JOIN with parallel replicas is only supported with subqueries");
data.getContext()->getQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
return;
}
else if (settings.allow_experimental_parallel_reading_from_replicas == 2)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOIN with parallel replicas is only supported with subqueries");
}
}
Names required_columns;
/// Fill required columns for GLOBAL JOIN.

View File

@ -864,38 +864,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis()
ASTSelectQuery & query = getSelectQuery();
/// While only_analyze we don't know anything about parts, so any decision about how many parallel replicas to use would be wrong
if (!storage || !context->canUseParallelReplicasOnInitiator())
return false;
/// check if IN operator with subquery is present in the query
/// if so, disable parallel replicas
if (query_analyzer->getPreparedSets()->hasSubqueries())
{
bool in_subqueries = false;
const auto & sets = query_analyzer->getPreparedSets();
const auto subqueries = sets->getSubqueries();
for (const auto & subquery : subqueries)
{
if (subquery->isINSubquery())
{
in_subqueries = true;
break;
}
}
if (in_subqueries)
{
if (settings.allow_experimental_parallel_reading_from_replicas == 2)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas");
context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
context->setSetting("max_parallel_replicas", UInt64{0});
LOG_DEBUG(log, "Disabling parallel replicas to execute a query with IN with subquery");
return true;
}
}
if (options.only_analyze)
if (!storage || options.only_analyze || !context->canUseParallelReplicasOnInitiator())
return false;
if (getTrivialCount(0).has_value())

View File

@ -342,6 +342,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
return part && part->hasProjection(name);
}
bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const
{
return part && part->hasBrokenProjection(name);
}
bool MutationsInterpreter::Source::isCompactPart() const
{
return part && part->getType() == MergeTreeDataPartType::Compact;
@ -807,7 +812,7 @@ void MutationsInterpreter::prepare(bool dry_run)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
const auto & projection = projections_desc.get(command.projection_name);
if (!source.hasProjection(projection.name))
if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name))
{
for (const auto & column : projection.required_columns)
dependencies.emplace(column, ColumnDependency::PROJECTION);
@ -994,6 +999,13 @@ void MutationsInterpreter::prepare(bool dry_run)
if (!source.hasProjection(projection.name))
continue;
/// Always rebuild broken projections.
if (source.hasBrokenProjection(projection.name))
{
materialized_projections.insert(projection.name);
continue;
}
if (need_rebuild_projections)
{
materialized_projections.insert(projection.name);

View File

@ -126,6 +126,7 @@ public:
bool materializeTTLRecalculateOnly() const;
bool hasSecondaryIndex(const String & name) const;
bool hasProjection(const String & name) const;
bool hasBrokenProjection(const String & name) const;
bool isCompactPart() const;
void read(

View File

@ -98,12 +98,8 @@ FutureSetFromSubquery::FutureSetFromSubquery(
std::unique_ptr<QueryPlan> source_,
StoragePtr external_table_,
std::shared_ptr<FutureSetFromSubquery> external_table_set_,
const Settings & settings,
bool in_subquery_)
: external_table(std::move(external_table_))
, external_table_set(std::move(external_table_set_))
, source(std::move(source_))
, in_subquery(in_subquery_)
const Settings & settings)
: external_table(std::move(external_table_)), external_table_set(std::move(external_table_set_)), source(std::move(source_))
{
set_and_key = std::make_shared<SetAndKey>();
set_and_key->key = std::move(key);
@ -281,16 +277,10 @@ FutureSetFromSubqueryPtr PreparedSets::addFromSubquery(
std::unique_ptr<QueryPlan> source,
StoragePtr external_table,
FutureSetFromSubqueryPtr external_table_set,
const Settings & settings,
bool in_subquery)
const Settings & settings)
{
auto from_subquery = std::make_shared<FutureSetFromSubquery>(
toString(key, {}),
std::move(source),
std::move(external_table),
std::move(external_table_set),
settings,
in_subquery);
toString(key, {}), std::move(source), std::move(external_table), std::move(external_table_set), settings);
auto [it, inserted] = sets_from_subqueries.emplace(key, from_subquery);
@ -340,15 +330,6 @@ std::shared_ptr<FutureSetFromSubquery> PreparedSets::findSubquery(const Hash & k
return it->second;
}
void PreparedSets::markAsINSubquery(const Hash & key)
{
auto it = sets_from_subqueries.find(key);
if (it == sets_from_subqueries.end())
return;
it->second->markAsINSubquery();
}
std::shared_ptr<FutureSetFromStorage> PreparedSets::findStorage(const Hash & key) const
{
auto it = sets_from_storage.find(key);

View File

@ -101,8 +101,7 @@ public:
std::unique_ptr<QueryPlan> source_,
StoragePtr external_table_,
std::shared_ptr<FutureSetFromSubquery> external_table_set_,
const Settings & settings,
bool in_subquery_);
const Settings & settings);
FutureSetFromSubquery(
String key,
@ -118,8 +117,6 @@ public:
QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); }
void setQueryPlan(std::unique_ptr<QueryPlan> source_);
void markAsINSubquery() { in_subquery = true; }
bool isINSubquery() const { return in_subquery; }
private:
SetAndKeyPtr set_and_key;
@ -128,11 +125,6 @@ private:
std::unique_ptr<QueryPlan> source;
QueryTreeNodePtr query_tree;
bool in_subquery = false; // subquery used in IN operator
// the flag can be removed after enabling new analyzer and removing interpreter
// or after enabling support IN operator with subqueries in parallel replicas
// Note: it's necessary with interpreter since prepared sets used also for GLOBAL JOINs,
// with new analyzer it's not a case
};
using FutureSetFromSubqueryPtr = std::shared_ptr<FutureSetFromSubquery>;
@ -160,8 +152,7 @@ public:
std::unique_ptr<QueryPlan> source,
StoragePtr external_table,
FutureSetFromSubqueryPtr external_table_set,
const Settings & settings,
bool in_subquery = false);
const Settings & settings);
FutureSetFromSubqueryPtr addFromSubquery(
const Hash & key,
@ -171,7 +162,6 @@ public:
FutureSetFromTuplePtr findTuple(const Hash & key, const DataTypes & types) const;
FutureSetFromStoragePtr findStorage(const Hash & key) const;
FutureSetFromSubqueryPtr findSubquery(const Hash & key) const;
void markAsINSubquery(const Hash & key);
using Subqueries = std::vector<FutureSetFromSubqueryPtr>;
Subqueries getSubqueries() const;

View File

@ -349,10 +349,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So
try
{
auto auth_result = global_context->getAccessControl().authenticate(credentials_, address.host());
auto auth_result = global_context->getAccessControl().authenticate(credentials_, address.host(), getClientInfo().getLastForwardedFor());
user_id = auth_result.user_id;
settings_from_auth_server = auth_result.settings;
LOG_DEBUG(log, "{} Authenticated with global context as user {}",
toString(auth_id), toString(*user_id));
}

View File

@ -159,7 +159,6 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
if (config.getBool("logger.use_syslog", false))
{
//const std::string & cmd_name = commandName();
auto syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string));
if (syslog_level > max_log_level)
{
@ -228,22 +227,24 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
split->open();
logger.close();
logger.setChannel(split);
// Global logging level (it can be overridden for specific loggers).
logger.setChannel(split);
logger.setLevel(max_log_level);
// Set level to all already created loggers
std::vector<std::string> names;
//logger_root = Logger::root();
logger.root().names(names);
for (const auto & name : names)
logger.root().get(name).setLevel(max_log_level);
// Attach to the root logger.
// Global logging level and channel (it can be overridden for specific loggers).
logger.root().setLevel(max_log_level);
logger.root().setChannel(logger.getChannel());
// Set level and channel to all already created loggers
std::vector<std::string> names;
logger.names(names);
for (const auto & name : names)
{
logger.get(name).setLevel(max_log_level);
logger.get(name).setChannel(split);
}
// Explicitly specified log levels for specific loggers.
{
Poco::Util::AbstractConfiguration::Keys loggers_level;

View File

@ -116,6 +116,18 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!type_name_suffix.empty())
type_name = type_name_upper + " " + type_name_suffix;
/// skip trailing comma in types, e.g. Tuple(Int, String,)
if (pos->type == TokenType::Comma)
{
Expected test_expected;
auto test_pos = pos;
++test_pos;
if (ParserToken(TokenType::ClosingRoundBracket).ignore(test_pos, test_expected))
{ // the end of the type definition was reached and there was a trailing comma
++pos;
}
}
auto function_node = std::make_shared<ASTFunction>();
function_node->name = type_name;
function_node->no_empty_args = true;
@ -133,6 +145,9 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!args_parser.parse(pos, expr_list_args, expected))
return false;
if (pos->type == TokenType::Comma)
// ignore trailing comma inside Nested structures like Tuple(Int, Tuple(Int, String),)
++pos;
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;

View File

@ -223,7 +223,7 @@ bool analyzeProjectionCandidate(
{
const auto & created_projections = part_with_ranges.data_part->getProjectionParts();
auto it = created_projections.find(candidate.projection->name);
if (it != created_projections.end())
if (it != created_projections.end() && !it->second->is_broken)
{
projection_parts.push_back(it->second);
}

View File

@ -12,7 +12,16 @@ void AsynchronousMessageProducer::start(const ContextPtr & context)
{
LOG_TEST(log, "Executing startup");
initialize();
try
{
initialize();
}
catch (...)
{
finished = true;
throw;
}
producing_task = context->getSchedulePool().createTask(getProducingTaskName(), [this]
{
LOG_TEST(log, "Starting producing task loop");

View File

@ -335,7 +335,9 @@ void DataPartStorageOnDiskBase::backup(
const ReadSettings & read_settings,
bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const
TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const
{
fs::path part_path_on_disk = fs::path{root_path} / part_dir;
fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir;
@ -377,7 +379,7 @@ void DataPartStorageOnDiskBase::backup(
bool copy_encrypted = !backup_settings.decrypt_files_from_encrypted_disks;
for (const auto & filepath : files_to_backup)
auto backup_file = [&](const String & filepath)
{
auto filepath_on_disk = part_path_on_disk / filepath;
auto filepath_in_backup = part_path_in_backup / filepath;
@ -385,8 +387,10 @@ void DataPartStorageOnDiskBase::backup(
if (files_without_checksums.contains(filepath))
{
backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, read_settings, copy_encrypted));
continue;
return;
}
else if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk))
return;
if (make_temporary_hard_links)
{
@ -411,6 +415,31 @@ void DataPartStorageOnDiskBase::backup(
backup_entry = wrapBackupEntryWith(std::move(backup_entry), temp_dir_owner);
backup_entries.emplace_back(filepath_in_backup, std::move(backup_entry));
};
auto * log = &Poco::Logger::get("DataPartStorageOnDiskBase::backup");
for (const auto & filepath : files_to_backup)
{
if (is_projection_part && allow_backup_broken_projection)
{
try
{
backup_file(filepath);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::FILE_DOESNT_EXIST)
throw;
LOG_ERROR(log, "Cannot backup file {} of projection part {}. Will try to ignore it", filepath, part_dir);
continue;
}
}
else
{
backup_file(filepath);
}
}
}

View File

@ -58,7 +58,9 @@ public:
const ReadSettings & read_settings,
bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const override;
TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const override;
MutableDataPartStoragePtr freeze(
const std::string & to,

View File

@ -223,7 +223,9 @@ public:
const ReadSettings & read_settings,
bool make_temporary_hard_links,
BackupEntries & backup_entries,
TemporaryFilesOnDisks * temp_dirs) const = 0;
TemporaryFilesOnDisks * temp_dirs,
bool is_projection_part,
bool allow_backup_broken_projection) const = 0;
/// Creates hardlinks into 'to/dir_path' for every file in data part.
/// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed.

View File

@ -706,13 +706,14 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
loadPartitionAndMinMaxIndex();
bool has_broken_projections = false;
if (!parent_part)
{
loadTTLInfos();
loadProjections(require_columns_checksums, check_consistency, false /* if_not_loaded */);
loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */);
}
if (check_consistency)
if (check_consistency && !has_broken_projections)
checkConsistency(require_columns_checksums);
loadDefaultCompressionCodec();
@ -774,7 +775,7 @@ void IMergeTreeDataPart::addProjectionPart(
projection_parts[projection_name] = std::move(projection_part);
}
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded)
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded)
{
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
for (const auto & projection : metadata_snapshot->projections)
@ -791,10 +792,34 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch
else
{
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
try
{
part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
throw;
auto message = getCurrentExceptionMessage(true);
LOG_ERROR(&Poco::Logger::get("IMergeTreeDataPart"),
"Cannot load projection {}, will consider it broken. Reason: {}", projection.name, message);
has_broken_projection = true;
part->setBrokenReason(message, getCurrentExceptionCode());
}
addProjectionPart(projection.name, std::move(part));
}
}
else if (checksums.has(path))
{
auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build();
part->setBrokenReason("Projection directory " + path + " does not exist while loading projections", ErrorCodes::NO_FILE_IN_DATA_PART);
addProjectionPart(projection.name, std::move(part));
has_broken_projection = true;
}
}
}
@ -1189,7 +1214,8 @@ void IMergeTreeDataPart::loadChecksums(bool require)
/// Check the data while we are at it.
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
checksums = checkDataPart(shared_from_this(), false);
bool noop;
checksums = checkDataPart(shared_from_this(), false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */false);
writeChecksums(checksums, {});
bytes_on_disk = checksums.getTotalSizeOnDisk();
@ -2196,6 +2222,32 @@ std::optional<String> IMergeTreeDataPart::getStreamNameForColumn(
return getStreamNameOrHash(stream_name, extension, storage_);
}
void IMergeTreeDataPart::markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const
{
auto it = projection_parts.find(projection_name);
if (it == projection_parts.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no projection part '{}'", projection_name);
it->second->setBrokenReason(message, code);
}
bool IMergeTreeDataPart::hasBrokenProjection(const String & projection_name) const
{
auto it = projection_parts.find(projection_name);
if (it == projection_parts.end())
return false;
return it->second->is_broken;
}
void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const
{
std::lock_guard lock(broken_reason_mutex);
if (is_broken)
return;
is_broken = true;
exception = message;
exception_code = code;
}
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
{
return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);

View File

@ -261,6 +261,12 @@ public:
/// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table.
mutable std::atomic<bool> is_frozen {false};
/// If it is a projection part, it can be broken sometimes.
mutable std::atomic<bool> is_broken {false};
mutable std::string exception;
mutable int exception_code = 0;
mutable std::mutex broken_reason_mutex;
/// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper
mutable bool is_unexpected_local_part = false;
@ -424,9 +430,16 @@ public:
void addProjectionPart(const String & projection_name, std::shared_ptr<IMergeTreeDataPart> && projection_part);
void markProjectionPartAsBroken(const String & projection_name, const String & message, int code) const;
bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); }
void loadProjections(bool require_columns_checksums, bool check_consistency, bool if_not_loaded = false);
bool hasBrokenProjection(const String & projection_name) const;
/// Return true, if all projections were loaded successfully and none was marked as broken.
void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false);
void setBrokenReason(const String & message, int code) const;
/// Return set of metadata file names without checksums. For example,
/// columns.txt or checksums.txt itself.
@ -580,7 +593,7 @@ protected:
const IMergeTreeDataPart * parent_part;
String parent_part_name;
std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
mutable std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
mutable PartMetadataManagerPtr metadata_manager;

View File

@ -728,8 +728,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
MergeTreeData::DataPartsVector projection_parts;
for (const auto & part : global_ctx->future_part->parts)
{
auto it = part->getProjectionParts().find(projection.name);
if (it != part->getProjectionParts().end())
auto actual_projection_parts = part->getProjectionParts();
auto it = actual_projection_parts.find(projection.name);
if (it != actual_projection_parts.end() && !it->second->is_broken)
projection_parts.push_back(it->second);
}
if (projection_parts.size() < global_ctx->future_part->parts.size())

View File

@ -5351,7 +5351,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
if (hold_table_lock && !table_lock)
table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
if (backup_settings.check_parts)
if (backup_settings.check_projection_parts)
part->checkConsistencyWithProjections(/* require_part_metadata= */ true);
BackupEntries backup_entries_from_part;
@ -5363,7 +5363,8 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
read_settings,
make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs);
&temp_dirs,
false, false);
auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts)
@ -5376,7 +5377,9 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
read_settings,
make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs);
&temp_dirs,
projection_part->is_broken,
backup_settings.allow_backup_broken_projections);
}
if (hold_storage_and_part_ptrs)
@ -7747,21 +7750,39 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason)
{
if (left->getProjectionParts().size() != right->getProjectionParts().size())
auto remove_broken_parts_from_consideration = [](auto & parts)
{
std::set<String> broken_projection_parts;
for (const auto & [name, part] : parts)
{
if (part->is_broken)
broken_projection_parts.emplace(name);
}
for (const auto & name : broken_projection_parts)
parts.erase(name);
};
auto left_projection_parts = left->getProjectionParts();
auto right_projection_parts = right->getProjectionParts();
remove_broken_parts_from_consideration(left_projection_parts);
remove_broken_parts_from_consideration(right_projection_parts);
if (left_projection_parts.size() != right_projection_parts.size())
{
out_reason = fmt::format(
"Parts have different number of projections: {} in part '{}' and {} in part '{}'",
left->getProjectionParts().size(),
left_projection_parts.size(),
left->name,
right->getProjectionParts().size(),
right_projection_parts.size(),
right->name
);
return false;
}
for (const auto & [name, _] : left->getProjectionParts())
for (const auto & [name, _] : left_projection_parts)
{
if (!right->hasProjection(name))
if (!right_projection_parts.contains(name))
{
out_reason = fmt::format(
"The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name

View File

@ -469,8 +469,13 @@ public:
struct ProjectionPartsVector
{
DataPartsVector projection_parts;
DataPartsVector data_parts;
DataPartsVector projection_parts;
DataPartStateVector projection_parts_states;
DataPartsVector broken_projection_parts;
DataPartStateVector broken_projection_parts_states;
};
/// Returns a copy of the list so that the caller shouldn't worry about locks.
@ -485,7 +490,7 @@ public:
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const;
/// Same as above but only returns projection parts
ProjectionPartsVector getProjectionPartsVectorForInternalUsage(
const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr) const;
const DataPartStates & affordable_states, MergeTreeData::DataPartStateVector * out_states) const;
/// Returns absolutely all parts (and snapshot of their states)

View File

@ -54,6 +54,8 @@ struct MergeTreeDataPartChecksums
bool has(const String & file_name) const { return files.find(file_name) != files.end(); }
bool remove(const String & file_name) { return files.erase(file_name); }
bool empty() const { return files.empty(); }
/// Checks that the set of columns and their checksums are the same. If not, throws an exception.

View File

@ -551,7 +551,9 @@ static std::set<ProjectionDescriptionRawPtr> getProjectionsToRecalculate(
{
bool need_recalculate =
materialized_projections.contains(projection.name)
|| (!is_full_part_storage && source_part->hasProjection(projection.name));
|| (!is_full_part_storage
&& source_part->hasProjection(projection.name)
&& !source_part->hasBrokenProjection(projection.name));
if (need_recalculate)
projections_to_recalc.insert(&projection);
@ -895,7 +897,8 @@ void finalizeMutatedPart(
new_data_part->modification_time = time(nullptr);
/// Load rest projections which are hardlinked
new_data_part->loadProjections(false, false, true /* if_not_loaded */);
bool noop;
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
/// All information about sizes is stored in checksums.
/// It doesn't make sense to touch filesystem for sizes.
@ -1472,7 +1475,9 @@ private:
bool need_recalculate =
ctx->materialized_projections.contains(projection.name)
|| (!is_full_part_storage && ctx->source_part->hasProjection(projection.name));
|| (!is_full_part_storage
&& ctx->source_part->hasProjection(projection.name)
&& !ctx->source_part->hasBrokenProjection(projection.name));
if (need_recalculate)
{
@ -1596,8 +1601,9 @@ private:
void finalize()
{
bool noop;
ctx->new_data_part->minmax_idx = std::move(ctx->minmax_idx);
ctx->new_data_part->loadProjections(false, false, true /* if_not_loaded */);
ctx->new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
ctx->mutating_executor.reset();
ctx->mutating_pipeline.reset();

View File

@ -63,7 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t
if (parts_set.contains(name))
return;
LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds);
LOG_TRACE(log, "Enqueueing {} for check after {}s", name, delay_to_check_seconds);
parts_queue.emplace_back(name, std::chrono::steady_clock::now() + std::chrono::seconds(delay_to_check_seconds));
parts_set.insert(name);
task->schedule();
@ -274,7 +274,7 @@ std::pair<bool, MergeTreeDataPartPtr> ReplicatedMergeTreePartCheckThread::findLo
return std::make_pair(exists_in_zookeeper, part);
}
ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name)
ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name, bool throw_on_broken_projection)
{
ReplicatedCheckResult result;
auto [exists_in_zookeeper, part] = findLocalPart(part_name);
@ -341,6 +341,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
/// before the ReplicatedMergeTreePartHeader was introduced.
String part_path = storage.replica_path + "/parts/" + part_name;
String part_znode = zookeeper->get(part_path);
bool is_broken_projection = false;
try
{
@ -362,8 +363,10 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
checkDataPart(
part,
true,
[this] { return need_stop.load(); });
/* require_checksums */true,
is_broken_projection,
[this] { return need_stop.load(); },
throw_on_broken_projection);
if (need_stop)
{
@ -382,14 +385,27 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
if (isRetryableException(std::current_exception()))
throw;
tryLogCurrentException(log, __PRETTY_FUNCTION__);
PreformattedMessage message;
if (is_broken_projection)
{
WriteBufferFromOwnString wb;
message = PreformattedMessage::create(
"Part {} has a broken projections. It will be ignored. Broken projections info: {}",
part_name, getCurrentExceptionMessage(false));
LOG_DEBUG(log, message);
result.action = ReplicatedCheckResult::DoNothing;
}
else
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name);
LOG_ERROR(log, message);
message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name);
LOG_ERROR(log, message);
result.action = ReplicatedCheckResult::TryFetchMissing;
}
/// Part is broken, let's try to find it and fetch.
result.status = {part_name, false, message};
result.action = ReplicatedCheckResult::TryFetchMissing;
return result;
}
@ -419,12 +435,12 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
}
CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after)
CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after, bool throw_on_broken_projection)
{
LOG_INFO(log, "Checking part {}", part_name);
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks);
ReplicatedCheckResult result = checkPartImpl(part_name);
ReplicatedCheckResult result = checkPartImpl(part_name, throw_on_broken_projection);
switch (result.action)
{
case ReplicatedCheckResult::None: UNREACHABLE();
@ -577,7 +593,7 @@ void ReplicatedMergeTreePartCheckThread::run()
}
std::optional<time_t> recheck_after;
checkPartAndFix(selected->name, &recheck_after);
checkPartAndFix(selected->name, &recheck_after, /* throw_on_broken_projection */false);
if (need_stop)
return;

View File

@ -65,9 +65,9 @@ public:
size_t size() const;
/// Check part by name
CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr);
CheckResult checkPartAndFix(const String & part_name, std::optional<time_t> * recheck_after = nullptr, bool throw_on_broken_projection = true);
ReplicatedCheckResult checkPartImpl(const String & part_name);
ReplicatedCheckResult checkPartImpl(const String & part_name, bool throw_on_broken_projection);
std::unique_lock<std::mutex> pausePartsCheck();

View File

@ -43,6 +43,7 @@ namespace ErrorCodes
extern const int NO_FILE_IN_DATA_PART;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int BROKEN_PROJECTION;
}
@ -117,7 +118,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
const NameSet & files_without_checksums,
const ReadSettings & read_settings,
bool require_checksums,
std::function<bool()> is_cancelled)
std::function<bool()> is_cancelled,
bool & is_broken_projection,
bool throw_on_broken_projection)
{
/** Responsibility:
* - read list of columns from columns.txt;
@ -126,6 +129,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
*/
CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedChecks};
Poco::Logger * log = &Poco::Logger::get("checkDataPart");
NamesAndTypesList columns_txt;
@ -275,17 +279,55 @@ static IMergeTreeDataPart::Checksums checkDataPart(
}
}
std::string broken_projections_message;
for (const auto & [name, projection] : data_part->getProjectionParts())
{
if (is_cancelled())
return {};
auto projection_file = name + ".proj";
auto projection_checksums = checkDataPart(
projection, *data_part_storage.getProjection(projection_file),
projection->getColumns(), projection->getType(),
projection->getFileNamesWithoutChecksums(),
read_settings, require_checksums, is_cancelled);
if (!throw_on_broken_projection && projection->is_broken)
{
projections_on_disk.erase(projection_file);
checksums_txt.remove(projection_file);
}
IMergeTreeDataPart::Checksums projection_checksums;
try
{
bool noop;
projection_checksums = checkDataPart(
projection, *data_part_storage.getProjection(projection_file),
projection->getColumns(), projection->getType(),
projection->getFileNamesWithoutChecksums(),
read_settings, require_checksums, is_cancelled, noop, /* throw_on_broken_projection */false);
}
catch (...)
{
if (isRetryableException(std::current_exception()))
throw;
if (!projection->is_broken)
{
LOG_TEST(log, "Marking projection {} as broken ({})", name, projection_file);
projection->setBrokenReason(getCurrentExceptionMessage(false), getCurrentExceptionCode());
}
is_broken_projection = true;
if (throw_on_broken_projection)
{
if (!broken_projections_message.empty())
broken_projections_message += "\n";
broken_projections_message += fmt::format(
"Part {} has a broken projection {} (error: {})",
data_part->name, name, getCurrentExceptionMessage(false));
continue;
}
projections_on_disk.erase(projection_file);
checksums_txt.remove(projection_file);
}
checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum(
projection_checksums.getTotalSizeOnDisk(),
@ -294,6 +336,11 @@ static IMergeTreeDataPart::Checksums checkDataPart(
projections_on_disk.erase(projection_file);
}
if (throw_on_broken_projection && !broken_projections_message.empty())
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
@ -321,7 +368,9 @@ IMergeTreeDataPart::Checksums checkDataPartInMemory(const DataPartInMemoryPtr &
IMergeTreeDataPart::Checksums checkDataPart(
MergeTreeData::DataPartPtr data_part,
bool require_checksums,
std::function<bool()> is_cancelled)
bool & is_broken_projection,
std::function<bool()> is_cancelled,
bool throw_on_broken_projection)
{
if (auto part_in_memory = asInMemoryPart(data_part))
return checkDataPartInMemory(part_in_memory);
@ -363,7 +412,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
data_part->getFileNamesWithoutChecksums(),
read_settings,
require_checksums,
is_cancelled);
is_cancelled,
is_broken_projection,
throw_on_broken_projection);
};
try
@ -377,7 +428,9 @@ IMergeTreeDataPart::Checksums checkDataPart(
data_part->getFileNamesWithoutChecksums(),
read_settings,
require_checksums,
is_cancelled);
is_cancelled,
is_broken_projection,
throw_on_broken_projection);
}
catch (...)
{

Some files were not shown because too many files have changed in this diff Show More