Merge remote-tracking branch 'blessed/master' into parallel_replicas_cte_fix

2024-09-20 08:40:50 +00:00 · 2024-02-01 12:04:46 +01:00 · 2024-02-01 12:04:46 +01:00 · 18386f7fb9
commit 18386f7fb9
parent a0222c0479 b93f003b3e
31 changed files with 652 additions and 150 deletions
--- a/contrib/corrosion-cmake/CMakeLists.txt
+++ b/contrib/corrosion-cmake/CMakeLists.txt
@ -16,29 +16,30 @@ message(STATUS "Checking Rust toolchain for current target")

 # See https://doc.rust-lang.org/nightly/rustc/platform-support.html

-if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
-    set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
-elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
-    set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
-elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
-    set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
-elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
-    set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
-elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
-    set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
-elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
-    set(Rust_CARGO_TARGET "x86_64-apple-darwin")
-elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
-    set(Rust_CARGO_TARGET "aarch64-apple-darwin")
-elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
-    set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
-elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
-    set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
-else()
-    message(FATAL_ERROR "Unsupported rust target")
-endif()
-
-message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
+if(DEFINED CMAKE_TOOLCHAIN_FILE)
+    if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
+        set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
+    elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
+        set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
+    elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
+        set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
+    elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
+        set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
+    elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
+        set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
+    elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
+        set(Rust_CARGO_TARGET "x86_64-apple-darwin")
+    elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
+        set(Rust_CARGO_TARGET "aarch64-apple-darwin")
+    elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
+        set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
+    elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
+        set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
+    else()
+        message(FATAL_ERROR "Unsupported rust target")
+    endif()
+    message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
+endif ()

 # FindRust.cmake
 list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
--- a/contrib/curl
+++ b/contrib/curl
@ -1 +1 @@
-Subproject commit d755a5f7c009dd63a61b2c745180d8ba937cbfeb
+Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd
--- a/contrib/libxml2
+++ b/contrib/libxml2
@ -1 +1 @@
-Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
+Subproject commit 8292f361458fcffe0bff515a385be02e9d35582c
--- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
+++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h
@ -21,7 +21,7 @@ extern "C" {
 * your library and includes mismatch
 */
 #ifndef LIBXML2_COMPILING_MSCCDEF
-XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
+XMLPUBFUN void xmlCheckVersion(int version);
 #endif /* LIBXML2_COMPILING_MSCCDEF */

 /**
@ -29,28 +29,28 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 *
 * the version string like "1.2.3"
 */
-#define LIBXML_DOTTED_VERSION "2.10.3"
+#define LIBXML_DOTTED_VERSION "2.12.4"

 /**
 * LIBXML_VERSION:
 *
 * the version number: 1.2.3 value is 10203
 */
-#define LIBXML_VERSION 21003
+#define LIBXML_VERSION 21204

 /**
 * LIBXML_VERSION_STRING:
 *
 * the version number string, 1.2.3 value is "10203"
 */
-#define LIBXML_VERSION_STRING "21003"
+#define LIBXML_VERSION_STRING "21204"

 /**
 * LIBXML_VERSION_EXTRA:
 *
 * extra version information, used to show a git commit description
 */
-#define LIBXML_VERSION_EXTRA ""
+#define LIBXML_VERSION_EXTRA "-GITv2.12.4"

 /**
 * LIBXML_TEST_VERSION:
@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 * Macro to check that the libxml version in use is compatible with
 * the version the software has been compiled against
 */
-#define LIBXML_TEST_VERSION xmlCheckVersion(21003);
+#define LIBXML_TEST_VERSION xmlCheckVersion(21204);

 #ifndef VMS
 #if 0
@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 *
 * Whether iconv support is available
 */
-#if 0
+#if 1
 #define LIBXML_ICONV_ENABLED
 #endif

@ -313,7 +313,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 /**
 * LIBXML_DEBUG_RUNTIME:
 *
- * Whether the runtime debugging is configured in
+ * Removed
 */
 #if 0
 #define LIBXML_DEBUG_RUNTIME
@ -409,12 +409,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 #endif

 #ifdef __GNUC__
-
-/**
- * ATTRIBUTE_UNUSED:
- *
- * Macro used to signal to GCC unused function parameters
- */
+/** DOC_DISABLE */

 #ifndef ATTRIBUTE_UNUSED
 # if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7)))
@ -424,12 +419,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 # endif
 #endif

-/**
- * LIBXML_ATTR_ALLOC_SIZE:
- *
- * Macro used to indicate to GCC this is an allocator function
- */
-
 #ifndef LIBXML_ATTR_ALLOC_SIZE
 # if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
 #  define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
@ -440,12 +429,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 # define LIBXML_ATTR_ALLOC_SIZE(x)
 #endif

-/**
- * LIBXML_ATTR_FORMAT:
- *
- * Macro used to indicate to GCC the parameter are printf like
- */
-
 #ifndef LIBXML_ATTR_FORMAT
 # if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))
 #  define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args)))
@ -457,44 +440,69 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
 #endif

 #ifndef XML_DEPRECATED
-#  ifdef IN_LIBXML
+#  if defined (IN_LIBXML) || (__GNUC__ * 100 + __GNUC_MINOR__ < 301)
 #    define XML_DEPRECATED
-#  else
 /* Available since at least GCC 3.1 */
+#  else
 #    define XML_DEPRECATED __attribute__((deprecated))
 #  endif
 #endif

+#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)
+  #if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 800)
+    #define XML_IGNORE_FPTR_CAST_WARNINGS \
+      _Pragma("GCC diagnostic push") \
+      _Pragma("GCC diagnostic ignored \"-Wpedantic\"") \
+      _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
+  #else
+    #define XML_IGNORE_FPTR_CAST_WARNINGS \
+      _Pragma("GCC diagnostic push") \
+      _Pragma("GCC diagnostic ignored \"-Wpedantic\"")
+  #endif
+  #define XML_POP_WARNINGS \
+    _Pragma("GCC diagnostic pop")
+#else
+  #define XML_IGNORE_FPTR_CAST_WARNINGS
+  #define XML_POP_WARNINGS
+#endif
+
 #else /* ! __GNUC__ */
-/**
- * ATTRIBUTE_UNUSED:
- *
- * Macro used to signal to GCC unused function parameters
- */
 #define ATTRIBUTE_UNUSED
-/**
- * LIBXML_ATTR_ALLOC_SIZE:
- *
- * Macro used to indicate to GCC this is an allocator function
- */
 #define LIBXML_ATTR_ALLOC_SIZE(x)
-/**
- * LIBXML_ATTR_FORMAT:
- *
- * Macro used to indicate to GCC the parameter are printf like
- */
 #define LIBXML_ATTR_FORMAT(fmt,args)
-/**
- * XML_DEPRECATED:
- *
- * Macro used to indicate that a function, variable, type or struct member
- * is deprecated.
- */
 #ifndef XML_DEPRECATED
-#define XML_DEPRECATED
+#  if defined (IN_LIBXML) || !defined (_MSC_VER)
+#    define XML_DEPRECATED
+/* Available since Visual Studio 2005 */
+#  elif defined (_MSC_VER) && (_MSC_VER >= 1400)
+#    define XML_DEPRECATED __declspec(deprecated)
+#  endif
+#endif
+#if defined (_MSC_VER) && (_MSC_VER >= 1400)
+#  define XML_IGNORE_FPTR_CAST_WARNINGS __pragma(warning(push))
+#else
+#  define XML_IGNORE_FPTR_CAST_WARNINGS
+#endif
+#ifndef XML_POP_WARNINGS
+#  if defined (_MSC_VER) && (_MSC_VER >= 1400)
+#    define XML_POP_WARNINGS __pragma(warning(pop))
+#  else
+#    define XML_POP_WARNINGS
+#  endif
 #endif
 #endif /* __GNUC__ */

+#define XML_NO_ATTR
+
+#ifdef LIBXML_THREAD_ENABLED
+  #define XML_DECLARE_GLOBAL(name, type, attrs) \
+    attrs XMLPUBFUN type *__##name(void);
+  #define XML_GLOBAL_MACRO(name) (*__##name())
+#else
+  #define XML_DECLARE_GLOBAL(name, type, attrs) \
+    attrs XMLPUBVAR type name;
+#endif
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -293,10 +293,10 @@ if [ $failed_to_save_logs -ne 0 ]; then
    #   for files >64MB, we want this files to be compressed explicitly
    for table in query_log zookeeper_log trace_log transactions_info_log metric_log
    do
-        clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
+        clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
        if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
-            clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
-            clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
+            clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
+            clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
        fi
    done
 fi
--- a/docs/en/getting-started/example-datasets/noaa.md
+++ b/docs/en/getting-started/example-datasets/noaa.md
@ -0,0 +1,342 @@
+---
+slug: /en/getting-started/example-datasets/noaa
+sidebar_label: NOAA Global Historical Climatology Network 
+sidebar_position: 1
+description: 2.5 billion rows of climate data for the last 120 yrs
+---
+
+# NOAA Global Historical Climatology Network 
+
+This dataset contains weather measurements for the last 120 years. Each row is a measurement for a point in time and station.
+
+More precisely and according to the [origin of this data](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
+
+> GHCN-Daily is a dataset that contains daily observations over global land areas. It contains station-based measurements from land-based stations worldwide, about two-thirds of which are for precipitation measurements only (Menne et al., 2012). GHCN-Daily is a composite of climate records from numerous sources that were merged together and subjected to a common suite of quality assurance reviews (Durre et al., 2010). The archive includes the following meteorological elements:
+
+    - Daily maximum temperature
+    - Daily minimum temperature
+    - Temperature at the time of observation
+    - Precipitation (i.e., rain, melted snow)
+    - Snowfall
+    - Snow depth
+    - Other elements where available
+
+## Downloading the data
+
+- A [pre-prepared version](#pre-prepared-data) of the data for ClickHouse, which has been cleansed, re-structured, and enriched. This data covers the years 1900 to 2022.
+- [Download the original data](#original-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore this approach.
+
+### Pre-prepared data
+
+More specifically, rows have been removed that did not fail any quality assurance checks by Noaa. The data has also been restructured from a measurement per line to a row per station id and date, i.e.
+
+```csv
+"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
+"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
+"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
+"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
+"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
+```
+
+This is simpler to query and ensures the resulting table is less sparse. Finally, the data has also been enriched with latitude and longitude.
+
+This data is available in the following S3 location. Either download the data to your local filesystem (and insert using the ClickHouse client) or insert directly into ClickHouse (see [Inserting from S3](#inserting-from-s3)).
+
+To download:
+
+```bash
+wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet
+```
+
+### Original data
+
+The following details the steps to download and transform the original data in preparation for loading into ClickHouse.
+
+#### Download
+
+To download the original data:
+
+```bash
+for i in {1900..2023}; do wget https://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/${i}.csv.gz; done
+```
+
+#### Sampling the data
+
+```bash
+$ clickhouse-local --query "SELECT * FROM '2021.csv.gz' LIMIT 10" --format PrettyCompact
+┌─c1──────────┬───────c2─┬─c3───┬──c4─┬─c5───┬─c6───┬─c7─┬───c8─┐
+│ AE000041196 │ 20210101 │ TMAX │ 278 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AE000041196 │ 20210101 │ PRCP │   0 │ D    │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AE000041196 │ 20210101 │ TAVG │ 214 │ H    │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041194 │ 20210101 │ TMAX │ 266 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041194 │ 20210101 │ TMIN │ 178 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041194 │ 20210101 │ PRCP │   0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041194 │ 20210101 │ TAVG │ 217 │ H    │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041217 │ 20210101 │ TMAX │ 262 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041217 │ 20210101 │ TMIN │ 155 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+│ AEM00041217 │ 20210101 │ TAVG │ 202 │ H    │ ᴺᵁᴸᴸ │ S  │ ᴺᵁᴸᴸ │
+└─────────────┴──────────┴──────┴─────┴──────┴──────┴────┴──────┘
+```
+
+Summarizing the [format documentation](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
+
+
+Summarizing the format documentation and the columns in order:
+
+ - An 11 character station identification code. This itself encodes some useful information
+ - YEAR/MONTH/DAY = 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986)
+ - ELEMENT = 4 character indicator of element type. Effectively the measurement type. While there are many measurements available, we select the following:
+    - PRCP - Precipitation (tenths of mm)
+    - SNOW - Snowfall (mm)
+    - SNWD - Snow depth (mm)
+    - TMAX - Maximum temperature (tenths of degrees C)
+    - TAVG - Average temperature (tenths of a degree C)
+    - TMIN - Minimum temperature (tenths of degrees C)
+    - PSUN - Daily percent of possible sunshine (percent)
+    - AWND - Average daily wind speed (tenths of meters per second)
+    - WSFG - Peak gust wind speed (tenths of meters per second)
+    - WT** = Weather Type where ** defines the weather type. Full list of weather types here.
+- DATA VALUE = 5 character data value for ELEMENT i.e. the value of the measurement.
+- M-FLAG = 1 character Measurement Flag. This has 10 possible values. Some of these values indicate questionable data accuracy. We accept data where this is set to “P” - identified as missing presumed zero, as this is only relevant to the PRCP, SNOW and SNWD measurements.
+- Q-FLAG is the measurement quality flag with 14 possible values. We are only interested in data with an empty value i.e. it did not fail any quality assurance checks.
+- S-FLAG is the source flag for the observation. Not useful for our analysis and ignored.
+- OBS-TIME = 4-character time of observation in hour-minute format (i.e. 0700 =7:00 am). Typically not present in older data. We ignore this for our purposes.
+
+A measurement per line would result in a sparse table structure in ClickHouse. We should transform to a row per time and station, with measurements as columns. First, we limit the dataset to those rows without issues i.e. where `qFlag` is equal to an empty string.
+
+#### Clean the data
+
+Using [ClickHouse local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) we can filter rows that represent measurements of interest and pass our quality requirements:
+
+```bash
+clickhouse local --query "SELECT count() 
+FROM file('*.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))"
+
+2679264563
+```
+
+With over 2.6 billion rows, this isn’t a fast query since it involves parsing all the files. On our 8 core  machine, this takes around 160 seconds.
+
+
+### Pivot data
+
+While the measurement per line structure can be used with ClickHouse, it will unnecessarily complicate future queries. Ideally, we need a row per station id and date, where each measurement type and associated value are a column i.e.
+
+```csv
+"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
+"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
+"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
+"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
+"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
+```
+
+Using ClickHouse local and a simple `GROUP BY`, we can repivot our data to this structure. To limit memory overhead, we do this one file at a time.
+
+```bash
+for i in {1900..2022}
+do
+clickhouse-local --query "SELECT station_id,
+       toDate32(date) as date,
+       anyIf(value, measurement = 'TAVG') as tempAvg,
+       anyIf(value, measurement = 'TMAX') as tempMax,
+       anyIf(value, measurement = 'TMIN') as tempMin,
+       anyIf(value, measurement = 'PRCP') as precipitation,
+       anyIf(value, measurement = 'SNOW') as snowfall,
+       anyIf(value, measurement = 'SNWD') as snowDepth,
+       anyIf(value, measurement = 'PSUN') as percentDailySun,
+       anyIf(value, measurement = 'AWND') as averageWindSpeed,
+       anyIf(value, measurement = 'WSFG') as maxWindSpeed,
+       toUInt8OrZero(replaceOne(anyIf(measurement, startsWith(measurement, 'WT') AND value = 1), 'WT', '')) as weatherType
+FROM file('$i.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String')
+ WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))
+GROUP BY station_id, date
+ORDER BY station_id, date FORMAT CSV" >> "noaa.csv";
+done
+```
+
+This query produces a single 50GB file `noaa.csv`.
+
+### Enriching the data
+
+The data has no indication of location aside from a station id, which includes a prefix country code. Ideally, each station would have a latitude and longitude associated with it. To achieve this, NOAA conveniently provides the details of each station as a separate [ghcnd-stations.txt](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file). This file has [several columns](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file), of which five are useful to our future analysis: id, latitude, longitude, elevation, and name.
+
+```bash
+wget http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt
+```
+
+```bash
+clickhouse local --query "WITH stations AS (SELECT id, lat, lon, elevation, splitByString(' GSN ',name)[1] as name FROM file('ghcnd-stations.txt', Regexp, 'id String, lat Float64, lon Float64, elevation Float32, name String'))
+SELECT station_id,
+       date,
+       tempAvg,
+       tempMax,
+       tempMin,
+       precipitation,
+       snowfall,
+       snowDepth,
+       percentDailySun,
+       averageWindSpeed,
+       maxWindSpeed,
+       weatherType,
+       tuple(lon, lat) as location,
+       elevation,
+       name
+FROM file('noaa.csv', CSV,
+          'station_id String, date Date32, tempAvg Int32, tempMax Int32, tempMin Int32, precipitation Int32, snowfall Int32, snowDepth Int32, percentDailySun Int8, averageWindSpeed Int32, maxWindSpeed Int32, weatherType UInt8') as noaa LEFT OUTER
+         JOIN stations ON noaa.station_id = stations.id INTO OUTFILE 'noaa_enriched.parquet' FORMAT Parquet SETTINGS format_regexp='^(.{11})\s+(\-?\d{1,2}\.\d{4})\s+(\-?\d{1,3}\.\d{1,4})\s+(\-?\d*\.\d*)\s+(.*)\s+(?:[\d]*)'" 
+```
+This query takes a few minutes to run and produces a 6.4 GB file, `noaa_enriched.parquet`.
+
+## Create table
+
+Create a MergeTree table in ClickHouse (from the ClickHouse client).
+
+```sql
+CREATE TABLE noaa
+(
+   `station_id` LowCardinality(String),
+   `date` Date32,
+   `tempAvg` Int32 COMMENT 'Average temperature (tenths of a degrees C)',
+   `tempMax` Int32 COMMENT 'Maximum temperature (tenths of degrees C)',
+   `tempMin` Int32 COMMENT 'Minimum temperature (tenths of degrees C)',
+   `precipitation` UInt32 COMMENT 'Precipitation (tenths of mm)',
+   `snowfall` UInt32 COMMENT 'Snowfall (mm)',
+   `snowDepth` UInt32 COMMENT 'Snow depth (mm)',
+   `percentDailySun` UInt8 COMMENT 'Daily percent of possible sunshine (percent)',
+   `averageWindSpeed` UInt32 COMMENT 'Average daily wind speed (tenths of meters per second)',
+   `maxWindSpeed` UInt32 COMMENT 'Peak gust wind speed (tenths of meters per second)',
+   `weatherType` Enum8('Normal' = 0, 'Fog' = 1, 'Heavy Fog' = 2, 'Thunder' = 3, 'Small Hail' = 4, 'Hail' = 5, 'Glaze' = 6, 'Dust/Ash' = 7, 'Smoke/Haze' = 8, 'Blowing/Drifting Snow' = 9, 'Tornado' = 10, 'High Winds' = 11, 'Blowing Spray' = 12, 'Mist' = 13, 'Drizzle' = 14, 'Freezing Drizzle' = 15, 'Rain' = 16, 'Freezing Rain' = 17, 'Snow' = 18, 'Unknown Precipitation' = 19, 'Ground Fog' = 21, 'Freezing Fog' = 22),
+   `location` Point,
+   `elevation` Float32,
+   `name` LowCardinality(String)
+) ENGINE = MergeTree() ORDER BY (station_id, date);
+
+```
+
+## Inserting into ClickHouse
+
+### Inserting from local file
+
+Data can be inserted from a local file as follows (from the ClickHouse client):
+
+```sql
+INSERT INTO noaa FROM INFILE '<path>/noaa_enriched.parquet'
+```
+
+where `<path>` represents the full path to the local file on disk. 
+
+See [here](https://clickhouse.com/blog/real-world-data-noaa-climate-data#load-the-data) for how to speed this load up.
+
+### Inserting from S3
+
+```sql
+INSERT INTO noaa SELECT *
+FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet')
+
+```
+For how to speed this up, see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2).
+
+## Sample queries
+
+### Highest temperature ever
+
+```sql
+SELECT
+    tempMax / 10 AS maxTemp,
+    location,
+    name,
+    date
+FROM blogs.noaa
+WHERE tempMax > 500
+ORDER BY
+    tempMax DESC,
+    date ASC
+LIMIT 5
+
+┌─maxTemp─┬─location──────────┬─name───────────────────────────────────────────┬───────date─┐
+│    56.7 │ (-116.8667,36.45) │ CA GREENLAND RCH                               │ 1913-07-10 │
+│    56.7 │ (-115.4667,32.55) │ MEXICALI (SMN)                                 │ 1949-08-20 │
+│    56.7 │ (-115.4667,32.55) │ MEXICALI (SMN)                                 │ 1949-09-18 │
+│    56.7 │ (-115.4667,32.55) │ MEXICALI (SMN)                                 │ 1952-07-17 │
+│    56.7 │ (-115.4667,32.55) │ MEXICALI (SMN)                                 │ 1952-09-04 │
+└─────────┴───────────────────┴────────────────────────────────────────────────┴────────────┘
+
+5 rows in set. Elapsed: 0.514 sec. Processed 1.06 billion rows, 4.27 GB (2.06 billion rows/s., 8.29 GB/s.)
+```
+
+Reassuringly consistent with the [documented record](https://en.wikipedia.org/wiki/List_of_weather_records#Highest_temperatures_ever_recorded) at [Furnace Creek](https://www.google.com/maps/place/36%C2%B027'00.0%22N+116%C2%B052'00.1%22W/@36.1329666,-116.1104099,8.95z/data=!4m5!3m4!1s0x0:0xf2ed901b860f4446!8m2!3d36.45!4d-116.8667) as of 2023.
+
+### Best ski resorts
+
+Using a [list of ski resorts](https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv) in the united states and their respective locations, we join these against the top 1000 weather stations with the most in any month in the last 5 yrs. Sorting this join by [geoDistance](https://clickhouse.com/docs/en/sql-reference/functions/geo/coordinates/#geodistance) and restricting the results to those where the distance is less than 20km, we select the top result per resort and sort this by total snow. Note we also restrict resorts to those above 1800m, as a broad indicator of good skiing conditions.
+
+```sql
+SELECT
+   resort_name,
+   total_snow / 1000 AS total_snow_m,
+   resort_location,
+   month_year
+FROM
+(
+   WITH resorts AS
+       (
+           SELECT
+               resort_name,
+               state,
+               (lon, lat) AS resort_location,
+               'US' AS code
+           FROM url('https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv', CSVWithNames)
+       )
+   SELECT
+       resort_name,
+       highest_snow.station_id,
+       geoDistance(resort_location.1, resort_location.2, station_location.1, station_location.2) / 1000 AS distance_km,
+       highest_snow.total_snow,
+       resort_location,
+       station_location,
+       month_year
+   FROM
+   (
+       SELECT
+           sum(snowfall) AS total_snow,
+           station_id,
+           any(location) AS station_location,
+           month_year,
+           substring(station_id, 1, 2) AS code
+       FROM noaa
+       WHERE (date > '2017-01-01') AND (code = 'US') AND (elevation > 1800)
+       GROUP BY
+           station_id,
+           toYYYYMM(date) AS month_year
+       ORDER BY total_snow DESC
+       LIMIT 1000
+   ) AS highest_snow
+   INNER JOIN resorts ON highest_snow.code = resorts.code
+   WHERE distance_km < 20
+   ORDER BY
+       resort_name ASC,
+       total_snow DESC
+   LIMIT 1 BY
+       resort_name,
+       station_id
+)
+ORDER BY total_snow DESC
+LIMIT 5
+
+┌─resort_name──────────┬─total_snow_m─┬─resort_location─┬─month_year─┐
+│ Sugar Bowl, CA       │        7.799 │ (-120.3,39.27)  │     201902 │
+│ Donner Ski Ranch, CA │        7.799 │ (-120.34,39.31) │     201902 │
+│ Boreal, CA           │        7.799 │ (-120.35,39.33) │     201902 │
+│ Homewood, CA         │        4.926 │ (-120.17,39.08) │     201902 │
+│ Alpine Meadows, CA   │        4.926 │ (-120.22,39.17) │     201902 │
+└──────────────────────┴──────────────┴─────────────────┴────────────┘
+
+5 rows in set. Elapsed: 0.750 sec. Processed 689.10 million rows, 3.20 GB (918.20 million rows/s., 4.26 GB/s.)
+Peak memory usage: 67.66 MiB.
+```
+
+## Credits
+
+We would like to acknowledge the efforts of the Global Historical Climatology Network for preparing, cleansing, and distributing this data. We appreciate your efforts.
+
+Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray, R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network - Daily (GHCN-Daily), Version 3. [indicate subset used following decimal, e.g. Version 3.25]. NOAA National Centers for Environmental Information. http://doi.org/10.7289/V5D21VHZ [17/08/2020]
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@ -197,6 +197,29 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
 Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).


+## Aliases {#cli_aliases}
+
+- `\l` - SHOW DATABASES
+- `\d` - SHOW TABLES
+- `\c <DATABASE>` - USE DATABASE
+- `.` - repeat the last query
+
+
+## Shortkeys {#shortkeys_aliases}
+
+- `Alt (Option) + Shift + e` - open editor with current query. It is possible to set up an environment variable - `EDITOR`, by default vim is used.
+- `Alt (Option) + #` - comment line.
+- `Ctrl + r` - fuzzy history search.
+
+:::tip
+To configure the correct work of meta key (Option) on MacOS:
+
+iTerm2: Go to Preferences -> Profile -> Keys -> Left Option key and click Esc+
+:::
+
+The full list with all available shortkeys - [replxx](https://github.com/AmokHuginnsson/replxx/blob/1f149bf/src/replxx_impl.cxx#L262).
+
+
 ## Connection string {#connection_string}

 clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:
--- a/docs/en/operations/utilities/clickhouse-benchmark.md
+++ b/docs/en/operations/utilities/clickhouse-benchmark.md
@ -45,11 +45,11 @@ clickhouse-benchmark [keys] < queries_file;
 - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1.
 - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1.
 - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys.
 - `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever).
 - `-r`, `--randomize` — Random order of queries execution if there is more than one input query.
 - `-s`, `--secure` — Using `TLS` connection.
 - `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled).
+- `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `--port` keys.
 - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence.
 - `--cumulative` — Printing cumulative data instead of data per interval.
 - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`.
--- a/rust/prql/src/lib.rs
+++ b/rust/prql/src/lib.rs
@ -2,6 +2,7 @@ use prql_compiler::sql::Dialect;
 use prql_compiler::{Options, Target};
 use std::ffi::{c_char, CString};
 use std::slice;
+use std::panic;

 fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
    assert!(!out_size.is_null());
@ -13,8 +14,7 @@ fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
    *out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
 }

-#[no_mangle]
-pub unsafe extern "C" fn prql_to_sql(
+pub unsafe extern "C" fn prql_to_sql_impl(
    query: *const u8,
    size: u64,
    out: *mut *mut u8,
@ -50,6 +50,23 @@ pub unsafe extern "C" fn prql_to_sql(
    }
 }

+#[no_mangle]
+pub unsafe extern "C" fn prql_to_sql(
+    query: *const u8,
+    size: u64,
+    out: *mut *mut u8,
+    out_size: *mut u64,
+) -> i64 {
+    let ret = panic::catch_unwind(|| {
+        return prql_to_sql_impl(query, size, out, out_size);
+    });
+    return match ret {
+        // NOTE: using cxxbridge we can return proper Result<> type.
+        Err(_err) => 1,
+        Ok(res) => res,
+    }
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
    std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));
--- a/rust/skim/src/lib.rs
+++ b/rust/skim/src/lib.rs
@ -1,6 +1,7 @@
 use skim::prelude::*;
 use term::terminfo::TermInfo;
 use cxx::{CxxString, CxxVector};
+use std::panic;

 #[cxx::bridge]
 mod ffi {
@ -36,7 +37,7 @@ impl SkimItem for Item {
    }
 }

-fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
+fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
    // Let's check is terminal available. To avoid panic.
    if let Err(err) = TermInfo::from_env() {
        return Err(format!("{}", err));
@ -89,3 +90,22 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
    }
    return Ok(output.selected_items[0].output().to_string());
 }
+
+fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
+    let ret = panic::catch_unwind(|| {
+        return skim_impl(prefix, words);
+    });
+    return match ret {
+        Err(err) => {
+            let e = if let Some(s) = err.downcast_ref::<String>() {
+                format!("{}", s)
+            } else if let Some(s) = err.downcast_ref::<&str>() {
+                format!("{}", s)
+            } else {
+                format!("Unknown panic type: {:?}", err.type_id())
+            };
+            Err(format!("Rust panic: {:?}", e))
+        },
+        Ok(res) => res,
+    }
+}
--- a/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp
@ -17,6 +17,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int INCORRECT_DATA;
    extern const int LOGICAL_ERROR;
    extern const int NOT_IMPLEMENTED;
 }
@ -30,12 +31,12 @@ class ApproxSampler
 public:
    struct Stats
    {
-        T value;      // the sampled value
-        Int64 g;      // the minimum rank jump from the previous value's minimum rank
-        Int64 delta;  // the maximum span of the rank
+        T value;     // The sampled value
+        Int64 g;     // The minimum rank jump from the previous value's minimum rank
+        Int64 delta; // The maximum span of the rank

        Stats() = default;
-        Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
+        Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) { }
    };

    struct QueryResult
@ -49,20 +50,20 @@ public:

    ApproxSampler() = default;

-    explicit ApproxSampler(
-        double relative_error_,
-        size_t compress_threshold_ = default_compress_threshold,
-        size_t count_ = 0,
-        bool compressed_ = false)
-        : relative_error(relative_error_)
-        , compress_threshold(compress_threshold_)
-        , count(count_)
-        , compressed(compressed_)
+    ApproxSampler(const ApproxSampler & other)
+        : relative_error(other.relative_error)
+        , compress_threshold(other.compress_threshold)
+        , count(other.count)
+        , compressed(other.compressed)
+        , sampled(other.sampled.begin(), other.sampled.end())
+        , backup_sampled(other.backup_sampled.begin(), other.backup_sampled.end())
+        , head_sampled(other.head_sampled.begin(), other.head_sampled.end())
    {
-        sampled.reserve(compress_threshold);
-        backup_sampled.reserve(compress_threshold);
+    }

-        head_sampled.reserve(default_head_size);
+    explicit ApproxSampler(double relative_error_)
+        : relative_error(relative_error_), compress_threshold(default_compress_threshold), count(0), compressed(false)
+    {
    }

    bool isCompressed() const { return compressed; }
@ -95,9 +96,9 @@ public:
        Int64 current_max = std::numeric_limits<Int64>::min();
        for (const auto & stats : sampled)
            current_max = std::max(stats.delta + stats.g, current_max);
-        Int64 target_error = current_max/2;
+        Int64 target_error = current_max / 2;

-        size_t index= 0;
+        size_t index = 0;
        auto min_rank = sampled[0].g;
        for (size_t i = 0; i < size; ++i)
        {
@ -118,7 +119,6 @@ public:
                result[indices[i]] = res.value;
            }
        }
-
    }

    void compress()
@ -256,16 +256,27 @@ public:
    void read(ReadBuffer & buf)
    {
        readBinaryLittleEndian(compress_threshold, buf);
+        if (compress_threshold != default_compress_threshold)
+            throw Exception(
+                ErrorCodes::INCORRECT_DATA,
+                "The compress threshold {} isn't the expected one {}",
+                compress_threshold,
+                default_compress_threshold);
+
        readBinaryLittleEndian(relative_error, buf);
        readBinaryLittleEndian(count, buf);

        size_t sampled_len = 0;
        readBinaryLittleEndian(sampled_len, buf);
+        if (sampled_len > compress_threshold)
+            throw Exception(
+                ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold);
+
        sampled.resize(sampled_len);

        for (size_t i = 0; i < sampled_len; ++i)
        {
-            auto stats = sampled[i];
+            auto & stats = sampled[i];
            readBinaryLittleEndian(stats.value, buf);
            readBinaryLittleEndian(stats.g, buf);
            readBinaryLittleEndian(stats.delta, buf);
@ -291,7 +302,7 @@ private:
                min_rank += curr_sample.g;
            }
        }
-        return {sampled.size()-1, 0, sampled.back().value};
+        return {sampled.size() - 1, 0, sampled.back().value};
    }

    void withHeadBufferInserted()
@ -389,12 +400,11 @@ private:

    double relative_error;
    size_t compress_threshold;
-    size_t count = 0;
+    size_t count;
    bool compressed;

    PaddedPODArray<Stats> sampled;
    PaddedPODArray<Stats> backup_sampled;
-
    PaddedPODArray<T> head_sampled;

    static constexpr size_t default_compress_threshold = 10000;
@ -406,17 +416,14 @@ class QuantileGK
 {
 private:
    using Data = ApproxSampler<Value>;
-    mutable Data data;
+    Data data;

 public:
    QuantileGK() = default;

    explicit QuantileGK(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }

-    void add(const Value & x)
-    {
-        data.insert(x);
-    }
+    void add(const Value & x) { data.insert(x); }

    template <typename Weight>
    void add(const Value &, const Weight &)
@ -429,22 +436,34 @@ public:
        if (!data.isCompressed())
            data.compress();

-        data.merge(rhs.data);
+        if (rhs.data.isCompressed())
+            data.merge(rhs.data);
+        else
+        {
+            /// We can't modify rhs, so copy it and compress
+            Data rhs_data_copy(rhs.data);
+            rhs_data_copy.compress();
+            data.merge(rhs_data_copy);
+        }
    }

    void serialize(WriteBuffer & buf) const
    {
-        /// Always compress before serialization
-        if (!data.isCompressed())
-            data.compress();
-
-        data.write(buf);
+        if (data.isCompressed())
+            data.write(buf);
+        else
+        {
+            /// We can't modify rhs, so copy it and compress
+            Data data_copy(data);
+            data_copy.compress();
+            data_copy.write(buf);
+        }
    }

    void deserialize(ReadBuffer & buf)
    {
        data.read(buf);
-
+        /// Serialized data is always compressed
        data.setCompressed();
    }

@ -481,7 +500,6 @@ public:
    }
 };

-
 template <typename Value, bool _> using FuncQuantileGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantileGK, false, void, false, true>;
 template <typename Value, bool _> using FuncQuantilesGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantilesGK, false, void, true, true>;

--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@ -136,12 +136,12 @@ namespace
 {

 void assertDigest(
-    const KeeperStorage::Digest & first,
-    const KeeperStorage::Digest & second,
+    const KeeperStorage::Digest & expected,
+    const KeeperStorage::Digest & actual,
    const Coordination::ZooKeeperRequest & request,
    bool committing)
 {
-    if (!KeeperStorage::checkDigest(first, second))
+    if (!KeeperStorage::checkDigest(expected, actual))
    {
        LOG_FATAL(
            getLogger("KeeperStateMachine"),
@ -149,9 +149,9 @@ void assertDigest(
            "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}",
            committing ? "committing" : "preprocessing",
            request.getOpNum(),
-            first.value,
-            second.value,
-            first.version,
+            expected.value,
+            actual.value,
+            expected.version,
            request.toString());
        std::terminate();
    }
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@ -174,7 +174,6 @@ uint64_t calculateDigest(std::string_view path, std::string_view data, const Kee

    hash.update(data);

-    hash.update(stat.czxid);
    hash.update(stat.czxid);
    hash.update(stat.mzxid);
    hash.update(stat.ctime);
@ -183,7 +182,6 @@ uint64_t calculateDigest(std::string_view path, std::string_view data, const Kee
    hash.update(stat.cversion);
    hash.update(stat.aversion);
    hash.update(stat.ephemeralOwner);
-    hash.update(data.length());
    hash.update(stat.numChildren);
    hash.update(stat.pzxid);

@ -2531,6 +2529,17 @@ void KeeperStorage::recalculateStats()
    container.recalculateDataSize();
 }

+bool KeeperStorage::checkDigest(const Digest & first, const Digest & second)
+{
+    if (first.version != second.version)
+        return true;
+
+    if (first.version == DigestVersion::NO_DIGEST)
+        return true;
+
+    return first.value == second.value;
+}
+
 String KeeperStorage::generateDigest(const String & userdata)
 {
    std::vector<String> user_password;
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@ -95,10 +95,11 @@ public:
    {
        NO_DIGEST = 0,
        V1 = 1,
-        V2 = 2  // added system nodes that modify the digest on startup so digest from V0 is invalid
+        V2 = 2, // added system nodes that modify the digest on startup so digest from V0 is invalid
+        V3 = 3  // fixed bug with casting, removed duplicate czxid usage
    };

-    static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V2;
+    static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V3;

    struct ResponseForSession
    {
@ -113,16 +114,7 @@ public:
        uint64_t value{0};
    };

-    static bool checkDigest(const Digest & first, const Digest & second)
-    {
-        if (first.version != second.version)
-            return true;
-
-        if (first.version == DigestVersion::NO_DIGEST)
-            return true;
-
-        return first.value == second.value;
-    }
+    static bool checkDigest(const Digest & first, const Digest & second);

    static String generateDigest(const String & userdata);

--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@ -6,6 +6,7 @@
 #include <Formats/FormatSettings.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/BufferWithOwnMemory.h>
+#include <IO/PeekableReadBuffer.h>
 #include <IO/readFloatText.h>
 #include <IO/Operators.h>
 #include <base/find_symbols.h>
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -38,7 +38,6 @@
 #include <IO/CompressionMethod.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromMemory.h>
-#include <IO/PeekableReadBuffer.h>
 #include <IO/VarInt.h>

 #include <pcg_random.hpp>
@ -51,6 +50,7 @@ namespace DB

 template <typename Allocator>
 struct Memory;
+class PeekableReadBuffer;

 namespace ErrorCodes
 {
--- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp
+++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
@ -30,6 +30,7 @@ static const std::unordered_map<String, String> quantile_fuse_name_mapping =
    {"quantileTDigestWeighted", "quantilesTDigestWeighted"},
    {"quantileTiming", "quantilesTiming"},
    {"quantileTimingWeighted", "quantilesTimingWeighted"},
+    {"quantileGK", "quantilesGK"},
 };

 String GatherFunctionQuantileData::toFusedNameOrSelf(const String & func_name)
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@ -7,6 +7,7 @@
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
+#include <IO/PeekableReadBuffer.h>


 namespace DB
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@ -2,6 +2,7 @@
 #include <Formats/JSONUtils.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <IO/PeekableReadBuffer.h>
 #include <IO/ReadHelpers.h>

 namespace DB
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
@ -4,6 +4,7 @@
 #include <Formats/FormatSettings.h>
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
 #include <Processors/Formats/ISchemaReader.h>
+#include <IO/PeekableReadBuffer.h>


 namespace DB
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@ -7,6 +7,7 @@
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/PeekableReadBuffer.h>
 #include <Formats/EscapingRuleUtils.h>


--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@ -25,6 +25,7 @@
 #include <IO/WriteHelpers.h>
 #include <IO/Archives/createArchiveReader.h>
 #include <IO/Archives/IArchiveReader.h>
+#include <IO/PeekableReadBuffer.h>

 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@ -1,5 +1,6 @@
 #include <algorithm>
 #include <memory>
+#include <stack>
 #include <Core/NamesAndTypes.h>
 #include <Core/TypeId.h>

--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -1,11 +1,14 @@
 #!/usr/bin/env python3
 import atexit
-import sys
 import logging
+import sys
 from typing import Tuple

+# isort: off
 from github import Github

+# isort: on
+
 from commit_status_helper import (
    CI_STATUS_NAME,
    create_ci_report,
@ -18,12 +21,12 @@ from commit_status_helper import (
 )
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
-from pr_info import FORCE_TESTS_LABEL, PRInfo
 from lambda_shared_package.lambda_shared.pr import (
    CATEGORY_TO_LABEL,
    TRUSTED_CONTRIBUTORS,
    check_pr_description,
 )
+from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import FAILURE

 TRUSTED_ORG_IDS = {
@ -146,7 +149,7 @@ def main():
        )
        post_commit_status(
            commit,
-            "failure",
+            FAILURE,
            url,
            format_description(description_error),
            PR_CHECK,
@ -170,6 +173,14 @@ def main():
        # allow the workflow to continue

    if not can_run:
+        post_commit_status(
+            commit,
+            FAILURE,
+            "",
+            description,
+            PR_CHECK,
+            pr_info,
+        )
        print("::notice ::Cannot run")
        sys.exit(1)

--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@ -4,6 +4,7 @@
        <server_id>1</server_id>

        <create_snapshot_on_exit>1</create_snapshot_on_exit>
+        <digest_enabled>1</digest_enabled>

        <coordination_settings>
            <operation_timeout_ms>10000</operation_timeout_ms>
--- a/tests/integration/test_backup_restore_on_cluster/test.py
+++ b/tests/integration/test_backup_restore_on_cluster/test.py
@ -1087,9 +1087,11 @@ def test_stop_other_host_during_backup(kill):
    status = node1.query(f"SELECT status FROM system.backups WHERE id='{id}'").strip()

    if kill:
-        assert status in ["BACKUP_CREATED", "BACKUP_FAILED"]
+        expected_statuses = ["BACKUP_CREATED", "BACKUP_FAILED"]
    else:
-        assert status == "BACKUP_CREATED"
+        expected_statuses = ["BACKUP_CREATED", "BACKUP_CANCELLED"]
+
+    assert status in expected_statuses

    node2.start_clickhouse()

--- a/tests/queries/0_stateless/02661_quantile_approx.reference
+++ b/tests/queries/0_stateless/02661_quantile_approx.reference
@ -19,6 +19,20 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe
 [99,199,249,313,776]
 select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
 [100,200,250,314,777]
+SELECT quantileGKMerge(100, 0.5)(x)
+FROM
+(
+    SELECT quantileGKState(100, 0.5)(number + 1) AS x
+    FROM numbers(49999)
+);
+24902
+SELECT quantilesGKMerge(100, 0.5, 0.9, 0.99)(x)
+FROM
+(
+    SELECT quantilesGKState(100, 0.5, 0.9, 0.99)(number + 1) AS x
+    FROM numbers(49999)
+);
+[24902,44518,49999]
 select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
 select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
--- a/tests/queries/0_stateless/02661_quantile_approx.sql
+++ b/tests/queries/0_stateless/02661_quantile_approx.sql
@ -15,6 +15,19 @@ select quantilesGK(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number
 select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
 select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);

+SELECT quantileGKMerge(100, 0.5)(x)
+FROM
+(
+    SELECT quantileGKState(100, 0.5)(number + 1) AS x
+    FROM numbers(49999)
+);
+
+SELECT quantilesGKMerge(100, 0.5, 0.9, 0.99)(x)
+FROM
+(
+    SELECT quantilesGKState(100, 0.5, 0.9, 0.99)(number + 1) AS x
+    FROM numbers(49999)
+);

 select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
 select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
--- a/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference
+++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference
@ -1 +1 @@
-2024-01-01	Hello	World
+1
--- a/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql
+++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql
@ -1,6 +1,6 @@
-CREATE table if not exists table_with_dot_column (date Date, regular_column String, `other_column.2` String) ENGINE = MergeTree() ORDER BY date;
-INSERT INTO table_with_dot_column select '2020-01-01', 'Hello', 'World';
-INSERT INTO table_with_dot_column select '2024-01-01', 'Hello', 'World';
+CREATE TABLE IF NOT EXISTS table_with_dot_column (date Date, regular_column String, `other_column.2` String) ENGINE = MergeTree() ORDER BY date;
+INSERT INTO table_with_dot_column SELECT '2020-01-01', 'Hello', 'World';
+INSERT INTO table_with_dot_column SELECT toDate(now() + 48*3600), 'Hello', 'World';
 CREATE ROW POLICY IF NOT EXISTS row_policy ON table_with_dot_column USING toDate(date) >= today() - 30 TO ALL;
-SELECT * FROM table_with_dot_column;
+SELECT count(*) FROM table_with_dot_column;
 DROP TABLE table_with_dot_column;
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -1,4 +1,4 @@
-personal_ws-1.1 en 2657
+personal_ws-1.1 en 2724
 AArch
 ACLs
 ALTERs
@ -12,6 +12,7 @@ ARMv
 ASLR
 ASOF
 ASan
+AWND
 AWST
 Actian
 ActionsMenu
@ -238,6 +239,7 @@ DistributedSend
 DockerHub
 DoubleDelta
 Doxygen
+Durre
 ECMA
 Ecto
 EdgeAngle
@ -289,6 +291,7 @@ ForEach
 FreeBSD
 Fuzzer
 Fuzzers
+GHCN
 GTID
 GTest
 Gb
@ -444,6 +447,7 @@ Khanna
 KittenHouse
 Klickhouse
 Kolmogorov
+Korzeniewski
 Kubernetes
 LDAP
 LGPL
@ -503,6 +507,7 @@ MaxMind
 MaxPartCountForPartition
 MaxPushedDDLEntryID
 Mbps
+McNeal
 Memcheck
 MemoryCode
 MemoryDataAndStack
@ -512,6 +517,7 @@ MemorySanitizer
 MemoryShared
 MemoryTracking
 MemoryVirtual
+Menne
 MergeJoin
 MergeState
 MergeTree
@ -556,6 +562,7 @@ NEWDATE
 NEWDECIMAL
 NFKC
 NFKD
+NOAA
 NULLIF
 NVME
 NVMe
@ -576,6 +583,7 @@ NetworkSendBytes
 NetworkSendDrop
 NetworkSendErrors
 NetworkSendPackets
+Noaa
 NodeJs
 NuRaft
 NumHexagons
@ -656,8 +664,10 @@ OrZero
 OvercommitTracker
 PAAMAYIM
 PCRE
+PRCP
 PREWHERE
 PROCESSLIST
+PSUN
 PagerDuty
 ParallelFormattingOutputFormatThreads
 ParallelFormattingOutputFormatThreadsActive
@ -802,6 +812,7 @@ SIMD
 SLES
 SLRU
 SMALLINT
+SNWD
 SPNEGO
 SQEs
 SQLAlchemy
@ -874,11 +885,14 @@ SupersetDocker
 SystemReplicasThreads
 SystemReplicasThreadsActive
 TABLUM
+TAVG
 TCPConnection
 TCPThreads
 TDigest
 TINYINT
 TLSv
+TMAX
+TMIN
 TPCH
 TSDB
 TSVRaw
@ -980,7 +994,9 @@ VersionedCollapsingMergeTree
 VideoContainer
 ViewAllLink
 VirtualBox
+Vose
 WALs
+WSFG
 Welch's
 Werror
 Wether
@ -999,6 +1015,7 @@ Xeon
 YAML
 YAMLRegExpTree
 YYYY
+YYYYMMDD
 YYYYMMDDToDate
 YYYYMMDDhhmmssToDateTime
 Yandex
@ -1570,6 +1587,7 @@ getSetting
 getSizeOfEnumType
 getblockinfo
 getevents
+ghcnd
 github
 glibc
 globalIn
@ -1954,6 +1972,7 @@ ngramSimHashCaseInsensitiveUTF
 ngramSimHashUTF
 ngrambf
 ngrams
+noaa
 nonNegativeDerivative
 noop
 normalizeQuery
@ -2210,6 +2229,7 @@ reinterpretAsString
 reinterpretAsUInt
 reinterpretAsUUID
 remoteSecure
+repivot
 replaceAll
 replaceOne
 replaceRegexpAll
@ -2706,3 +2726,6 @@ znode
 znodes
 zookeeperSessionUptime
 zstd
+iTerm
+shortkeys
+Shortkeys