Merge branch 'master' into fix-keeper-apply-snapshot

This commit is contained in:
Antonio Andelic 2023-07-20 14:37:32 +02:00 committed by GitHub
commit 53c46e2dff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
77 changed files with 1343 additions and 253 deletions

View File

@ -473,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames}

View File

@ -989,6 +989,28 @@ Result
a b
```
### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
Allow to set default value to column when CSV field deserialization failed on bad value
Default value: `false`.
**Examples**
Query
```bash
./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
./clickhouse local -q "select * from test_tbl"
```
Result
```text
a 0 1971-01-01
```
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}

View File

@ -1138,6 +1138,8 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression.
Alias: `current_timestamp`.
**Syntax**
``` sql
@ -1268,6 +1270,8 @@ Result:
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
Aliases: `curdate`, `current_date`.
## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.

View File

@ -88,3 +88,4 @@ endfunction()
add_rust_subdirectory (BLAKE3)
add_rust_subdirectory (skim)
add_rust_subdirectory (prql)

3
rust/prql/CMakeLists.txt Normal file
View File

@ -0,0 +1,3 @@
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
target_include_directories(_ch_rust_prql INTERFACE include)
add_library(ch_rust::prql ALIAS _ch_rust_prql)

569
rust/prql/Cargo.lock generated Normal file
View File

@ -0,0 +1,569 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"serde_json",
]
[[package]]
name = "addr2line"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "backtrace"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"stacker",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "equivalent"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "indexmap"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "object"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustversion"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
[[package]]
name = "ryu"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
]
[[package]]
name = "serde_json"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sqlformat"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
dependencies = [
"log",
"serde",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

20
rust/prql/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "_ch_rust_prql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.8.1"
serde_json = "1.0"
[lib]
crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
lto = true

18
rust/prql/include/prql.h Normal file
View File

@ -0,0 +1,18 @@
#pragma once
#include <cstdint>
extern "C" {
/// Converts a PRQL query to an SQL query.
/// @param query is a pointer to the beginning of the PRQL query.
/// @param size is the size of the PRQL query.
/// @param out is a pointer to a uint8_t pointer which will be set to the beginning of the null terminated SQL query or the error message.
/// @param out_size is the size of the string pointed by `out`.
/// @returns zero in case of success, non-zero in case of failure.
int64_t prql_to_sql(const uint8_t * query, uint64_t size, uint8_t ** out, uint64_t * out_size);
/// Frees the passed in pointer which's memory was allocated by Rust allocators previously.
void prql_free_pointer(uint8_t * ptr_to_free);
} // extern "C"

56
rust/prql/src/lib.rs Normal file
View File

@ -0,0 +1,56 @@
use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
let out_size_ptr = unsafe { &mut *out_size };
*out_size_ptr = (result.len() + 1).try_into().unwrap();
assert!(!out.is_null());
let out_ptr = unsafe { &mut *out };
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
}
#[no_mangle]
pub unsafe extern "C" fn prql_to_sql(
query: *const u8,
size: u64,
out: *mut *mut u8,
out_size: *mut u64,
) -> i64 {
let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec();
let maybe_prql_query = String::from_utf8(query_vec);
if maybe_prql_query.is_err() {
set_output(
String::from("The PRQL query must be UTF-8 encoded!"),
out,
out_size,
);
return 1;
}
let prql_query = maybe_prql_query.unwrap();
let opts = &Options {
format: true,
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
};
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
Ok(sql_str) => (false, sql_str),
Err(err) => (true, err.to_string()),
};
set_output(res, out, out_size);
match is_err {
true => 1,
false => 0,
}
}
#[no_mangle]
pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));
}

View File

@ -46,6 +46,7 @@
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h>
@ -72,6 +73,7 @@
#include <iostream>
#include <filesystem>
#include <map>
#include <memory>
#include <unordered_map>
#include "config_version.h"
@ -338,6 +340,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth);
else
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);

View File

@ -105,6 +105,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
for (auto it = addresses.begin(); it != addresses.end();)
{
have_more_addresses_to_connect = it != std::prev(addresses.end());
if (connected)
disconnect();

View File

@ -159,6 +159,8 @@ public:
out->setAsyncCallback(async_callback);
}
bool haveMoreAddressesToConnect() const { return have_more_addresses_to_connect; }
private:
String host;
UInt16 port;
@ -227,6 +229,8 @@ private:
std::shared_ptr<WriteBuffer> maybe_compressed_out;
std::unique_ptr<NativeWriter> block_out;
bool have_more_addresses_to_connect = false;
/// Logger is created lazily, for avoid to run DNS request in constructor.
class LoggerWrapper
{

View File

@ -179,7 +179,7 @@ bool ConnectionEstablisherAsync::checkTimeout()
is_timeout_alarmed = true;
}
if (is_timeout_alarmed && !is_socket_ready)
if (is_timeout_alarmed && !is_socket_ready && !haveMoreAddressesToConnect())
{
/// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run,
/// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
@ -225,6 +225,11 @@ void ConnectionEstablisherAsync::resetResult()
}
}
bool ConnectionEstablisherAsync::haveMoreAddressesToConnect()
{
return !result.entry.isNull() && result.entry->haveMoreAddressesToConnect();
}
#endif
}

View File

@ -104,6 +104,8 @@ private:
void resetResult();
bool haveMoreAddressesToConnect();
ConnectionEstablisher connection_establisher;
TryResult result;
std::string fail_message;

View File

@ -1,26 +1,4 @@
#include "Allocator.h"
/** Keep definition of this constant in cpp file; otherwise its value
* is inlined into allocator code making it impossible to override it
* in third-party code.
*
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
*/
#ifdef NDEBUG
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif
template class Allocator<false, false>;
template class Allocator<true, false>;
template class Allocator<false, true>;
template class Allocator<true, true>;
template class Allocator<false>;
template class Allocator<true>;

View File

@ -36,51 +36,26 @@
#include <Common/Allocator_fwd.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR;
}
}
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
*/
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
* The interface is different from std::allocator
@ -88,10 +63,8 @@ namespace ErrorCodes
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_, bool mmap_populate>
template <bool clear_memory_>
class Allocator
{
public:
@ -109,7 +82,7 @@ public:
try
{
checkSize(size);
freeNoTrack(buf, size);
freeNoTrack(buf);
CurrentMemoryTracker::free(size);
}
catch (...)
@ -132,49 +105,26 @@ public:
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
&& alignment <= MALLOC_MIN_ALIGNMENT)
else if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
{
DB::throwFromErrno(
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
buf = new_buf;
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
@ -192,83 +142,38 @@ protected:
static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
size_t mmap_min_alignment = ::getPageSize();
if (size >= MMAP_THRESHOLD)
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if (alignment > mmap_min_alignment)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Too large alignment {}: more than page size when allocating {}.",
ReadableSize(alignment), ReadableSize(size));
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (clear_memory)
memset(buf, 0, size);
}
return buf;
}
void freeNoTrack(void * buf, size_t size)
void freeNoTrack(void * buf)
{
if (size >= MMAP_THRESHOLD)
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
::free(buf);
}
::free(buf);
}
void checkSize(size_t size)
@ -277,21 +182,6 @@ private:
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint()
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
}
#else
void * getMmapHint()
{
return nullptr;
}
#endif
};
@ -367,7 +257,5 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
/// Prevent implicit template instantiation of Allocator
extern template class Allocator<false, false>;
extern template class Allocator<true, false>;
extern template class Allocator<false, true>;
extern template class Allocator<true, true>;
extern template class Allocator<false>;
extern template class Allocator<true>;

View File

@ -3,7 +3,7 @@
* This file provides forward declarations for Allocator.
*/
template <bool clear_memory_, bool mmap_populate = false>
template <bool clear_memory_>
class Allocator;
template <typename Base, size_t N = 64, size_t Alignment = 1>

View File

@ -173,8 +173,6 @@
M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \

View File

@ -8,7 +8,7 @@
* table, so it makes sense to pre-fault the pages so that page faults don't
* interrupt the resize loop. Set the allocator parameter accordingly.
*/
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
using HashTableAllocator = Allocator<true /* clear_memory */>;
template <size_t initial_bytes = 64>
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;

View File

@ -3,8 +3,10 @@
#include "KeeperException.h"
#include "TestKeeper.h"
#include <functional>
#include <filesystem>
#include <functional>
#include <ranges>
#include <vector>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
@ -350,15 +352,35 @@ void ZooKeeper::createIfNotExists(const std::string & path, const std::string &
void ZooKeeper::createAncestors(const std::string & path)
{
size_t pos = 1;
std::string data;
std::string path_created; // Ignored
std::vector<std::string> pending_nodes;
size_t last_pos = path.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
return;
std::string current_node = path.substr(0, last_pos);
while (true)
{
pos = path.find('/', pos);
if (pos == std::string::npos)
Coordination::Error code = createImpl(current_node, data, CreateMode::Persistent, path_created);
if (code == Coordination::Error::ZNONODE)
{
/// The parent node doesn't exist. Save the current node and try with the parent
last_pos = current_node.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
throw KeeperException(code, path);
pending_nodes.emplace_back(std::move(current_node));
current_node = path.substr(0, last_pos);
}
else if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
break;
createIfNotExists(path.substr(0, pos), "");
++pos;
else
throw KeeperException(code, path);
}
for (const std::string & pending : pending_nodes | std::views::reverse)
createIfNotExists(pending, data);
}
void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)

View File

@ -54,6 +54,7 @@
#cmakedefine01 USE_BORINGSSL
#cmakedefine01 USE_BLAKE3
#cmakedefine01 USE_SKIM
#cmakedefine01 USE_PRQL
#cmakedefine01 USE_OPENSSL_INTREE
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE

View File

@ -874,6 +874,7 @@ class IColumn;
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \

View File

@ -138,7 +138,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}})
{"kusto", Dialect::kusto},
{"kusto", Dialect::kusto},
{"prql", Dialect::prql}})
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?
IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,

View File

@ -207,6 +207,7 @@ enum class Dialect
clickhouse,
kusto,
kusto_auto,
prql,
};
DECLARE_SETTING_ENUM(Dialect)

View File

@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;

View File

@ -152,6 +152,7 @@ struct FormatSettings
bool trim_whitespaces = true;
bool allow_whitespace_or_tab_as_delimiter = false;
bool allow_variable_number_of_columns = false;
bool use_default_on_bad_values = false;
} csv;
struct HiveText

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
}
namespace
@ -108,6 +109,12 @@ public:
/// S2 acceptes point as (latitude, longitude)
S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon);
if (!lat_lng.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive"
"and the longitude is between -180 and 180 degrees inclusive.");
S2CellId id(lat_lng);
dst_data[row] = id.id();

View File

@ -138,6 +138,7 @@ private:
REGISTER_FUNCTION(Now)
{
factory.registerFunction<NowOverloadResolver>({}, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -114,13 +114,18 @@ public:
const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
rect.AddPoint(point.ToPoint());

View File

@ -107,13 +107,18 @@ public:
const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
dst_data.emplace_back(rect.Contains(point.ToLatLng()));
}

View File

@ -128,15 +128,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be.");
S2LatLngRect rect_intersection = rect1.Intersection(rect2);
vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id());

View File

@ -126,15 +126,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
S2LatLngRect rect_union = rect1.Union(rect2);
vec_res_first.emplace_back(S2CellId(rect_union.lo()).id());

View File

@ -97,7 +97,7 @@ public:
const auto id = S2CellId(data_id[row]);
if (!id.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS, "CellId is invalid.");
S2Point point = id.ToPoint();
S2LatLng ll(point);

View File

@ -86,6 +86,8 @@ public:
REGISTER_FUNCTION(Today)
{
factory.registerFunction<TodayOverloadResolver>();
factory.registerAlias("current_date", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("curdate", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -337,7 +337,7 @@ public:
{
{
std::lock_guard lock(mutex);
queue.emplace(file_segment->key(), file_segment->offset(), file_segment);
queue.push(DownloadInfo{file_segment->key(), file_segment->offset(), file_segment});
}
CurrentMetrics::add(CurrentMetrics::FilesystemCacheDownloadQueueElements);

View File

@ -6,6 +6,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/IAST.h>
#include <Parsers/formatAST.h>
#include <Common/logger_useful.h>
#include <Common/ProfileEvents.h>
#include <Common/SipHash.h>
@ -115,6 +116,13 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
return transformed_ast;
}
String queryStringFromAST(ASTPtr ast)
{
WriteBufferFromOwnString buf;
formatAST(*ast, buf, /*hilite*/ false, /*one_line*/ true, /*show_secrets*/ false);
return buf.str();
}
}
QueryCache::Key::Key(
@ -129,6 +137,7 @@ QueryCache::Key::Key(
, is_shared(is_shared_)
, expires_at(expires_at_)
, is_compressed(is_compressed_)
, query_string(queryStringFromAST(ast_))
{
}
@ -142,15 +151,6 @@ bool QueryCache::Key::operator==(const Key & other) const
return ast->getTreeHash() == other.ast->getTreeHash();
}
String QueryCache::Key::queryStringFromAst() const
{
WriteBufferFromOwnString buf;
IAST::FormatSettings format_settings(buf, /*one_line*/ true);
format_settings.show_secrets = false;
ast->format(format_settings);
return buf.str();
}
size_t QueryCache::KeyHasher::operator()(const Key & key) const
{
SipHash hash;
@ -191,7 +191,7 @@ QueryCache::Writer::Writer(
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
{
skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
}
}
@ -263,14 +263,14 @@ void QueryCache::Writer::finalizeWrite()
if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - query_start_time) < min_query_runtime)
{
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string);
return;
}
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
{
/// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
return;
}
@ -353,7 +353,7 @@ void QueryCache::Writer::finalizeWrite()
if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows))
{
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string);
return;
}
@ -388,7 +388,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
if (!entry.has_value())
{
LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string);
return;
}
@ -397,13 +397,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
if (!entry_key.is_shared && entry_key.user_name != key.user_name)
{
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string);
return;
}
if (IsStale()(entry_key))
{
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string);
return;
}
@ -441,7 +441,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes);
}
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string);
}
bool QueryCache::Reader::hasCacheEntryForKey() const

View File

@ -30,7 +30,7 @@ public:
/// ----------------------------------------------------
/// The actual key (data which gets hashed):
/// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select)
/// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select).
const ASTPtr ast;
/// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the
@ -58,6 +58,11 @@ public:
/// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case)
const bool is_compressed;
/// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the
/// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in
/// the SYSTEM.QUERY_CACHE.
const String query_string;
/// Ctor to construct a Key for writing into query cache.
Key(ASTPtr ast_,
Block header_,
@ -69,7 +74,6 @@ public:
Key(ASTPtr ast_, const String & user_name_);
bool operator==(const Key & other) const;
String queryStringFromAst() const;
};
struct Entry

View File

@ -697,6 +697,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_)
, loading_dependencies{"LoadingDeps"}
, view_dependencies{"ViewDeps"}
, log(&Poco::Logger::get("DatabaseCatalog"))
, first_async_drop_in_queue(tables_marked_dropped.end())
{
}
@ -959,9 +960,17 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
std::lock_guard lock(tables_marked_dropped_mutex);
if (ignore_delay)
tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time});
{
/// Insert it before first_async_drop_in_queue, so sync drop queries will have priority over async ones,
/// but the queue will remain fair for multiple sync drop queries.
tables_marked_dropped.emplace(first_async_drop_in_queue, TableMarkedAsDropped{table_id, table, dropped_metadata_path, drop_time});
}
else
{
tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec});
if (first_async_drop_in_queue == tables_marked_dropped.end())
--first_async_drop_in_queue;
}
tables_marked_dropped_ids.insert(table_id.uuid);
CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1);
@ -1012,6 +1021,8 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
/// This maybe throw exception.
renameNoReplace(latest_metadata_dropped_path, table_metadata_path);
if (first_async_drop_in_queue == it_dropped_table)
++first_async_drop_in_queue;
tables_marked_dropped.erase(it_dropped_table);
[[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(dropped_table.table_id.uuid);
assert(removed);
@ -1074,6 +1085,8 @@ void DatabaseCatalog::dropTableDataTask()
table = std::move(*it);
LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}",
tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs());
if (first_async_drop_in_queue == it)
++first_async_drop_in_queue;
tables_marked_dropped.erase(it);
/// Schedule the task as soon as possible, while there are suitable tables to drop.
schedule_after_ms = 0;
@ -1110,6 +1123,8 @@ void DatabaseCatalog::dropTableDataTask()
table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec;
std::lock_guard lock(tables_marked_dropped_mutex);
tables_marked_dropped.emplace_back(std::move(table));
if (first_async_drop_in_queue == tables_marked_dropped.end())
--first_async_drop_in_queue;
/// If list of dropped tables was empty, schedule a task to retry deletion.
if (tables_marked_dropped.size() == 1)
{

View File

@ -323,6 +323,7 @@ private:
mutable std::mutex ddl_guards_mutex;
TablesMarkedAsDropped tables_marked_dropped TSA_GUARDED_BY(tables_marked_dropped_mutex);
TablesMarkedAsDropped::iterator first_async_drop_in_queue TSA_GUARDED_BY(tables_marked_dropped_mutex);
std::unordered_set<UUID> tables_marked_dropped_ids TSA_GUARDED_BY(tables_marked_dropped_mutex);
mutable std::mutex tables_marked_dropped_mutex;

View File

@ -75,6 +75,7 @@
#include <random>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
namespace ProfileEvents
{
@ -702,10 +703,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}
else if (settings.dialect == Dialect::prql && !internal)
{
ParserPRQLQuery parser(max_query_size, settings.max_parser_depth);
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}
else
{
ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}

View File

@ -4,8 +4,12 @@ add_headers_and_sources(clickhouse_parsers .)
add_headers_and_sources(clickhouse_parsers ./Access)
add_headers_and_sources(clickhouse_parsers ./MySQL)
add_headers_and_sources(clickhouse_parsers ./Kusto)
add_headers_and_sources(clickhouse_parsers ./PRQL)
add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources})
target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils)
if (TARGET ch_rust::prql)
target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql)
endif ()
if (USE_DEBUG_HELPERS)
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.

View File

@ -0,0 +1,86 @@
#include <string>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include "Parsers/Lexer.h"
#include "config.h"
#if USE_PRQL
# include <prql.h>
#endif
#include <Parsers/ParserQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/parseQuery.h>
#include <base/scope_guard.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
extern const int SUPPORT_IS_DISABLED;
}
bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserSetQuery set_p;
if (set_p.parse(pos, node, expected))
return true;
#if !USE_PRQL
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED, "PRQL is not available. Rust code or PRQL itself may be disabled. Use another dialect!");
#else
const auto * begin = pos->begin;
// The same parsers are used in the client and the server, so the parser have to detect the end of a single query in case of multiquery queries
while (!pos->isEnd() && pos->type != TokenType::Semicolon)
++pos;
const auto * end = pos->begin;
uint8_t * sql_query_ptr{nullptr};
uint64_t sql_query_size{0};
const auto res
= prql_to_sql(reinterpret_cast<const uint8_t *>(begin), static_cast<uint64_t>(end - begin), &sql_query_ptr, &sql_query_size);
SCOPE_EXIT({ prql_free_pointer(sql_query_ptr); });
const auto * sql_query_char_ptr = reinterpret_cast<char *>(sql_query_ptr);
const auto * const original_sql_query_ptr = sql_query_char_ptr;
if (res != 0)
{
throw Exception(ErrorCodes::SYNTAX_ERROR, "PRQL syntax error: '{}'", sql_query_char_ptr);
}
chassert(sql_query_size > 0);
ParserQuery query_p(end, false);
String error_message;
node = tryParseQuery(
query_p,
sql_query_char_ptr,
sql_query_char_ptr + sql_query_size - 1,
error_message,
false,
"",
false,
max_query_size,
max_parser_depth);
if (!node)
throw Exception(
ErrorCodes::SYNTAX_ERROR,
"Error while parsing the SQL query generated from PRQL query :'{}'.\nPRQL Query:'{}'\nSQL query: '{}'",
error_message,
std::string_view{begin, end},
std::string_view(original_sql_query_ptr, original_sql_query_ptr + sql_query_size));
return true;
#endif
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
// Even when PRQL is disabled, it is not possible to exclude this parser because changing the dialect via `SET dialect = '...'` queries should succeed.
// Another solution would be disabling setting the dialect to PRQL, but it requires a lot of finicky conditional compiling around the Dialect setting enum.
// Therefore the decision, for now, is to use this parser even when PRQL is disabled to enable users to switch to another dialect.
class ParserPRQLQuery final : public IParserBase
{
private:
// These fields are not used when PRQL is disabled at build time.
[[maybe_unused]] size_t max_query_size;
[[maybe_unused]] size_t max_parser_depth;
public:
ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_}
{
}
const char * getName() const override { return "PRQL Statement"; }
protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -4,9 +4,9 @@
namespace DB
{
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line)
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets)
{
IAST::FormatSettings settings(buf, one_line);
IAST::FormatSettings settings(buf, one_line, show_secrets);
settings.hilite = hilite;
ast.format(settings);

View File

@ -11,7 +11,7 @@ class WriteBuffer;
/** Takes a syntax tree and turns it back into text.
* In case of INSERT query, the data will be missing.
*/
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false);
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true);
String serializeAST(const IAST & ast, bool one_line = true);

View File

@ -14,6 +14,7 @@
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <string_view>
#include <regex>
#include <gtest/gtest.h>
@ -476,3 +477,22 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest,
"SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')"
}
})));
static constexpr size_t kDummyMaxQuerySize = 256 * 1024;
static constexpr size_t kDummyMaxParserDepth = 256;
INSTANTIATE_TEST_SUITE_P(
ParserPRQL,
ParserTest,
::testing::Combine(
::testing::Values(std::make_shared<ParserPRQLQuery>(kDummyMaxQuerySize, kDummyMaxParserDepth)),
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"from albums\ngroup [author_id] (\n aggregate [first_pushlied = min published]\n)\njoin a=author side:left [==author_id]\njoin p=purchases side:right [==author_id]\ngroup [a.id, p.purchase_id] (\n aggregate [avg_sell = min first_pushlied]\n)",
"WITH table_1 AS\n (\n SELECT\n MIN(published) AS _expr_0,\n author_id\n FROM albums\n GROUP BY author_id\n )\nSELECT\n a.id,\n p.purchase_id,\n MIN(table_0._expr_0) AS avg_sell\nFROM table_1 AS table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n a.id,\n p.purchase_id",
},
{
"from matches\nfilter start_date > @2023-05-30 # Some comment here\nderive [\n some_derived_value_1 = a + (b ?? 0), # And there\n some_derived_value_2 = c + some_derived_value\n]\nfilter some_derived_value_2 > 0\ngroup [country, city] (\n aggregate [\n average some_derived_value_2,\n aggr = max some_derived_value_2,\n ]\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort [aggr, -country]\ntake 1..20",
"WITH\n table_3 AS\n (\n SELECT\n country,\n city,\n c + some_derived_value AS _expr_1\n FROM matches\n WHERE start_date > toDate('2023-05-30')\n ),\n table_1 AS\n (\n SELECT\n country,\n city,\n AVG(_expr_1) AS _expr_0,\n MAX(_expr_1) AS aggr\n FROM table_3 AS table_2\n WHERE _expr_1 > 0\n GROUP BY\n country,\n city\n )\nSELECT\n country,\n city,\n _expr_0,\n aggr,\n CONCAT(city, ' in ', country) AS place,\n LEFT(country, 2) AS country_code\nFROM table_1 AS table_0\nORDER BY\n aggr ASC,\n country DESC\nLIMIT 20",
},
})));

View File

@ -1,4 +1,5 @@
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/Operators.h>
@ -315,17 +316,54 @@ bool CSVFormatReader::readField(
return false;
}
if (format_settings.csv.use_default_on_bad_values)
return readFieldOrDefault(column, type, serialization);
return readFieldImpl(*buf, column, type, serialization);
}
bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization)
{
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
{
/// If value is null but type is not nullable then use default value instead.
return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization);
}
/// Read the column normally.
serialization->deserializeTextCSV(column, *buf, format_settings);
serialization->deserializeTextCSV(column, istr, format_settings);
return true;
}
bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization)
{
String field;
readCSVField(field, *buf, format_settings.csv);
ReadBufferFromString tmp_buf(field);
bool is_bad_value = false;
bool res = false;
size_t col_size = column.size();
try
{
res = readFieldImpl(tmp_buf, column, type, serialization);
/// Check if we parsed the whole field successfully.
if (!field.empty() && !tmp_buf.eof())
is_bad_value = true;
}
catch (const Exception &)
{
is_bad_value = true;
}
if (!is_bad_value)
return res;
if (column.size() == col_size + 1)
column.popBack(1);
column.insertDefault();
return false;
}
void CSVFormatReader::skipPrefixBeforeHeader()
{
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)

View File

@ -89,6 +89,8 @@ public:
void setReadBuffer(ReadBuffer & in_) override;
FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; }
bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization);
bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization);
protected:
PeekableReadBuffer * buf;

View File

@ -269,7 +269,7 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
try
{
ALLOW_ALLOCATIONS_IN_SCOPE;
item->task->getQueryId();
query_id = item->task->getQueryId();
need_execute_again = item->task->executeStep();
}
catch (...)

View File

@ -72,7 +72,7 @@ public:
if (!Coordination::isHardwareError(e.code))
throw;
setKeeperError(e.code, e.message());
setKeeperError(std::current_exception(), e.code, e.message());
}
catch (...)
{
@ -91,16 +91,16 @@ public:
}
catch (const zkutil::KeeperException & e)
{
setKeeperError(e.code, e.message());
setKeeperError(std::current_exception(), e.code, e.message());
}
catch (const Exception & e)
{
setUserError(e.code(), e.what());
setUserError(std::current_exception(), e.code(), e.what());
}
return false;
}
void setUserError(int code, std::string message)
void setUserError(std::exception_ptr exception, int code, std::string message)
{
if (retries_info.logger)
LOG_TRACE(
@ -113,16 +113,28 @@ public:
iteration_succeeded = false;
user_error.code = code;
user_error.message = std::move(message);
user_error.exception = exception;
keeper_error = KeeperError{};
}
template <typename... Args>
void setUserError(std::exception_ptr exception, int code, fmt::format_string<Args...> fmt, Args &&... args)
{
setUserError(exception, code, fmt::format(fmt, std::forward<Args>(args)...));
}
void setUserError(int code, std::string message)
{
setUserError(std::make_exception_ptr(Exception::createDeprecated(message, code)), code, message);
}
template <typename... Args>
void setUserError(int code, fmt::format_string<Args...> fmt, Args &&... args)
{
setUserError(code, fmt::format(fmt, std::forward<Args>(args)...));
}
void setKeeperError(Coordination::Error code, std::string message)
void setKeeperError(std::exception_ptr exception, Coordination::Error code, std::string message)
{
if (retries_info.logger)
LOG_TRACE(
@ -135,9 +147,21 @@ public:
iteration_succeeded = false;
keeper_error.code = code;
keeper_error.message = std::move(message);
keeper_error.exception = exception;
user_error = UserError{};
}
template <typename... Args>
void setKeeperError(std::exception_ptr exception, Coordination::Error code, fmt::format_string<Args...> fmt, Args &&... args)
{
setKeeperError(exception, code, fmt::format(fmt, std::forward<Args>(args)...));
}
void setKeeperError(Coordination::Error code, std::string message)
{
setKeeperError(std::make_exception_ptr(zkutil::KeeperException(message, code)), code, message);
}
template <typename... Args>
void setKeeperError(Coordination::Error code, fmt::format_string<Args...> fmt, Args &&... args)
{
@ -163,12 +187,14 @@ private:
using Code = Coordination::Error;
Code code = Code::ZOK;
std::string message;
std::exception_ptr exception;
};
struct UserError
{
int code = ErrorCodes::OK;
std::string message;
std::exception_ptr exception;
};
bool canTry()
@ -232,11 +258,11 @@ private:
void throwIfError() const
{
if (user_error.code != ErrorCodes::OK)
throw Exception::createDeprecated(user_error.message, user_error.code);
if (user_error.exception)
std::rethrow_exception(user_error.exception);
if (keeper_error.code != KeeperError::Code::ZOK)
throw zkutil::KeeperException(keeper_error.message, keeper_error.code);
if (keeper_error.exception)
std::rethrow_exception(keeper_error.exception);
}
void logLastError(std::string_view header)

View File

@ -9178,8 +9178,7 @@ std::optional<ZeroCopyLock> StorageReplicatedMergeTree::tryCreateZeroCopyExclusi
String zc_zookeeper_path = *getZeroCopyPartPath(part_name, disk);
/// Just recursively create ancestors for lock
zookeeper->createAncestors(zc_zookeeper_path);
zookeeper->createIfNotExists(zc_zookeeper_path, "");
zookeeper->createAncestors(zc_zookeeper_path + "/");
/// Create actual lock
ZeroCopyLock lock(zookeeper, zc_zookeeper_path, replica_name);

View File

@ -5,6 +5,8 @@
#include <Interpreters/formatWithPossiblyHidingSecrets.h>
#include <Access/ContextAccess.h>
#include <Storages/System/StorageSystemDatabases.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/VirtualColumnUtils.h>
#include <Parsers/ASTCreateQuery.h>
#include <Common/logger_useful.h>
@ -69,20 +71,52 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database
return engine_full;
}
void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQueryInfo & query_info, ContextPtr context)
{
MutableColumnPtr name_column = ColumnString::create();
MutableColumnPtr engine_column = ColumnString::create();
MutableColumnPtr uuid_column = ColumnUUID::create();
for (const auto & [database_name, database] : databases)
{
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
continue; /// We don't want to show the internal database for temporary tables in system.tables
name_column->insert(database_name);
engine_column->insert(database->getEngineName());
uuid_column->insert(database->getUUID());
}
Block block
{
ColumnWithTypeAndName(std::move(name_column), std::make_shared<DataTypeString>(), "name"),
ColumnWithTypeAndName(std::move(engine_column), std::make_shared<DataTypeString>(), "engine"),
ColumnWithTypeAndName(std::move(uuid_column), std::make_shared<DataTypeUUID>(), "uuid")
};
VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context);
return block.getByPosition(0).column;
}
void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const
{
const auto access = context->getAccess();
const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES);
const auto databases = DatabaseCatalog::instance().getDatabases();
for (const auto & [database_name, database] : databases)
ColumnPtr filtered_databases_column = getFilteredDatabases(databases, query_info, context);
for (size_t i = 0; i < filtered_databases_column->size(); ++i)
{
auto database_name = filtered_databases_column->getDataAt(i).toString();
if (check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name))
continue;
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
continue; /// filter out the internal database for temporary tables in system.databases, asynchronous metric "NumberOfDatabases" behaves the same way
const auto & database = databases.at(database_name);
res_columns[0]->insert(database_name);
res_columns[1]->insert(database->getEngineName());
res_columns[2]->insert(context->getPath() + database->getDataPath());

View File

@ -44,7 +44,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
if (!key.is_shared && key.user_name != user_name)
continue;
res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string
res_columns[0]->insert(key.query_string); /// approximates the original query string
res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result));
res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now());
res_columns[3]->insert(key.is_shared);

View File

@ -25,6 +25,9 @@ endif()
if (TARGET ch_rust::skim)
set(USE_SKIM 1)
endif()
if (TARGET ch_rust::prql)
set(USE_PRQL 1)
endif()
if (TARGET OpenSSL::SSL)
set(USE_SSL 1)
endif()

View File

@ -0,0 +1,7 @@
<clickhouse>
<profiles>
<default>
<use_hedged_requests>1</use_hedged_requests>
</default>
</profiles>
</clickhouse>

View File

@ -0,0 +1,4 @@
<clickhouse>
<listen_host>::</listen_host>
</clickhouse>

View File

@ -0,0 +1,72 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
@pytest.fixture(scope="module")
def cluster_without_dns_cache_update():
try:
cluster.start()
yield cluster
except Exception as ex:
print(ex)
finally:
cluster.shutdown()
pass
node1 = cluster.add_instance(
"node1",
main_configs=["configs/listen_host.xml"],
user_configs=["configs/enable_hedged.xml"],
with_zookeeper=True,
ipv4_address="10.5.95.11",
)
node2 = cluster.add_instance(
"node2",
main_configs=["configs/listen_host.xml"],
user_configs=["configs/enable_hedged.xml"],
with_zookeeper=True,
ipv4_address="10.5.95.12",
)
# node1 - source with table, have invalid ipv6
# node2 - destination, doing remote query
def test(cluster_without_dns_cache_update):
node1.query(
"CREATE TABLE test(t Date, label UInt8) ENGINE = MergeTree PARTITION BY t ORDER BY label;"
)
node1.query("INSERT INTO test SELECT toDate('2022-12-28'), 1;")
assert node1.query("SELECT count(*) FROM test") == "1\n"
wrong_ip = "2001:3984:3989::1:1118"
node2.exec_in_container(
(["bash", "-c", "echo '{} {}' >> /etc/hosts".format(wrong_ip, node1.name)])
)
node2.exec_in_container(
(
[
"bash",
"-c",
"echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name),
]
)
)
assert node1.query("SELECT count(*) from test") == "1\n"
node2.query("SYSTEM DROP DNS CACHE")
node1.query("SYSTEM DROP DNS CACHE")
assert (
node2.query(
f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;"
)
== "1\n"
)

View File

@ -2,7 +2,5 @@ CreatedReadBufferMMap
CreatedReadBufferMMapFailed
MMappedFileCacheHits
MMappedFileCacheMisses
MMappedAllocBytes
MMappedAllocs
MMappedFileBytes
MMappedFiles

View File

@ -39,4 +39,3 @@ Checking s2 index generation.
(74.0061,-68.32124) (74.0061,-68.32124) ok
(10.61077,-64.1841) (10.61077,-64.1841) ok
(-89.81096,-57.01398) (-89.81096,-57.01398) ok
4864204703484167331

View File

@ -44,7 +44,7 @@ SELECT first, second, result FROM (
SELECT s2ToGeo(toUInt64(-1)); -- { serverError 36 }
SELECT s2ToGeo(nan); -- { serverError 43 }
SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1)));
SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); -- { serverError BAD_ARGUMENTS }
SELECT geoToS2(nan, nan); -- { serverError 43 }
SELECT geoToS2(-inf, 1.1754943508222875e-38); -- { serverError 43 }

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan
# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -16,4 +16,4 @@
(5179062030687166815,5177056748191934217)
(5179062030687166815,5177057445452335297)
(5178914411069187297,5177056748191934217)
(6304347505408739331,8070450532247928833)
(5178914411069187297,5177912432982045463)

View File

@ -9,4 +9,4 @@ SELECT s2CapUnion(3814912406305146967, toFloat64(1), 1157347770437378819, toFloa
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, arrayJoin([5179056748191934217,5177914411069187297]));
SELECT s2RectContains(5179062030687166815, 5177056748191934217, arrayJoin([5177914411069187297, 5177914411069187297]));
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217, 5177914411069187297]));
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,1157347770437378819]));
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,5177914411069187297]));

View File

@ -1,2 +1,2 @@
A2193552DCF8A9F99AC35F86BC4D2FFD
SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS
SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1

View File

@ -0,0 +1,19 @@
101 Hello, ClickHouse! 2 He
101 Granules are the smallest chunks of data read 2 Gr
102 Insert a lot of rows per batch 2 In
102 Sort your data based on your commonly-used queries 2 So
103 This is an awesome message 2 Th
103 42
102 4.132209897041321
---
101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1
101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159
102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421
102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718
103 This is an awesome message 2019-04-01 00:00:00.000 42
---
101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1
101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159
102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421
102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718
103 This is an awesome message 2019-04-01 00:00:00.000 42

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Tags: no-fasttest, no-random-settings
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -n -q "
CREATE TEMPORARY TABLE IF NOT EXISTS aboba
(
user_id UInt32,
message String,
creation_date DateTime64,
metric Float32
)
ENGINE = MergeTree
ORDER BY user_id;
INSERT INTO aboba (user_id, message, creation_date, metric) VALUES (101, 'Hello, ClickHouse!', toDateTime('2019-01-01 00:00:00', 3, 'Europe/Amsterdam'), -1.0), (102, 'Insert a lot of rows per batch', toDateTime('2019-02-01 00:00:00', 3, 'Europe/Amsterdam'), 1.41421 ), (102, 'Sort your data based on your commonly-used queries', toDateTime('2019-03-01 00:00:00', 3, 'Europe/Amsterdam'), 2.718), (101, 'Granules are the smallest chunks of data read', toDateTime('2019-05-01 00:00:00', 3, 'Europe/Amsterdam'), 3.14159), (103, 'This is an awesome message', toDateTime('2019-04-01 00:00:00', 3, 'Europe/Amsterdam'), 42);
SET dialect = 'prql';
from aboba
derive [
a = 2,
b = s\"LEFT(message, 2)\"
]
select [ user_id, message, a, b ];
from aboba
filter user_id > 101
group user_id (
aggregate [
metrics = sum metric
]
);
SET dialect = 'clickhouse';
SELECT '---';
SELECT
user_id,
message,
toTimeZone(creation_date, 'Europe/Amsterdam') as creation_date,
metric
FROM aboba;
SELECT '---';
SET dialect = 'prql';
from aboba
select [ user_id, message, metric ]
derive creation_date = s\"toTimeZone(creation_date, 'Europe/Amsterdam')\"
select [ user_id, message, creation_date, metric];
from s\"SELECT * FROM system.users\" | select non_existent_column; # {serverError UNKNOWN_IDENTIFIER}
from non_existent_table; # {serverError UNKNOWN_TABLE}
"

View File

@ -0,0 +1,5 @@
0 111 1970-01-01 false
1 abc 2023-03-14 true
2 c 1970-01-01 false
4 888 2023-03-14 false
5 bks 1970-01-01 false

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# NOTE: this sh wrapper is required because of shell_config
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl"
$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, e Boolean) engine=MergeTree order by a"
cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
$CLICKHOUSE_CLIENT -q "select * from test_tbl"
$CLICKHOUSE_CLIENT -q "drop table test_tbl"

View File

@ -0,0 +1,3 @@
1
1
1

View File

@ -0,0 +1,3 @@
SELECT now() = current_timestamp();
SELECT now() = CURRENT_TIMESTAMP();
SELECT now() = current_TIMESTAMP();

View File

@ -0,0 +1,6 @@
1
1
1
1
1
1

View File

@ -0,0 +1,6 @@
SELECT today() = current_date();
SELECT today() = CURRENT_DATE();
SELECT today() = current_DATE();
SELECT today() = curdate();
SELECT today() = CURDATE();
SELECT today() = curDATE();

View File

@ -0,0 +1,3 @@
-- Tags: no-fasttest
SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); -- { serverError BAD_ARGUMENTS }

View File

@ -0,0 +1,5 @@
1,abc,2023-03-14,true
2,c,ab,false
bc,111,ab,ban
4,888,2023-03-14,false
5,bks,2023-03,abdd
1 1 abc 2023-03-14 true
2 2 c ab false
3 bc 111 ab ban
4 4 888 2023-03-14 false
5 5 bks 2023-03 abdd

View File

@ -2548,3 +2548,4 @@ znode
znodes
zookeeperSessionUptime
zstd
curdate