Merge branch 'master' into vdimir/join_on_null_fix

This commit is contained in:
vdimir 2024-07-09 12:56:50 +02:00 committed by GitHub
commit 306cc02af6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
57 changed files with 1230 additions and 667 deletions

View File

@ -79,7 +79,7 @@ namespace Net
/// Returns the value of the first name-value pair with the given name.
/// If no value with the given name has been found, the defaultValue is returned.
const std::vector<std::reference_wrapper<const std::string>> getAll(const std::string & name) const;
std::vector<std::string> getAll(const std::string & name) const;
/// Returns all values of all name-value pairs with the given name.
///
/// Returns an empty vector if there are no name-value pairs with the given name.

View File

@ -102,9 +102,9 @@ const std::string& NameValueCollection::get(const std::string& name, const std::
return defaultValue;
}
const std::vector<std::reference_wrapper<const std::string>> NameValueCollection::getAll(const std::string& name) const
std::vector<std::string> NameValueCollection::getAll(const std::string& name) const
{
std::vector<std::reference_wrapper<const std::string>> values;
std::vector<std::string> values;
for (ConstIterator it = _map.find(name); it != _map.end(); it++)
if (it->first == name)
values.push_back(it->second);

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9
Subproject commit 545e7002683cbc2198164d93088ac8e4955b4628

View File

@ -54,7 +54,6 @@ set(SRCS
"${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c"
"${LIBPQ_SOURCE_DIR}/port/thread.c"
"${LIBPQ_SOURCE_DIR}/port/path.c"
"${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c"
)
add_library(_libpq ${SRCS})

View File

@ -14,14 +14,13 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
# (it will choose between default server timezone and something specific).
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
echo "Chosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run)
dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true
dpkg -i package_folder/clickhouse-library-bridge_*.deb || true
dpkg -i package_folder/clickhouse-odbc-bridge_*.deb
dpkg -i package_folder/clickhouse-library-bridge_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
@ -58,12 +57,6 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
function remove_keeper_config()
{
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -1170,6 +1170,10 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa
Default value: 262144 (= 256 KiB).
:::note
`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed.
:::
## max_parser_depth {#max_parser_depth}
Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size.

View File

@ -5,23 +5,45 @@ sidebar_position: 165
# maxMap
Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))`
Calculates the maximum from `value` array according to the keys specified in the `key` array.
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
**Syntax**
The number of elements in `key` and `value` must be the same for each row that is totaled.
```sql
maxMap(key, value)
```
or
```sql
maxMap(Tuple(key, value))
```
Returns a tuple of two arrays: keys and values calculated for the corresponding keys.
Alias: `maxMappedArrays`
Example:
:::note
- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
- The number of elements in `key` and `value` must be the same for each row that is totaled.
:::
**Parameters**
- `key` — Array of keys. [Array](../../data-types/array.md).
- `value` — Array of values. [Array](../../data-types/array.md).
**Returned value**
- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
**Example**
Query:
``` sql
SELECT maxMap(a, b)
FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1]))
```
Result:
``` text
┌─maxMap(a, b)───────────┐
│ [['x','y','z'],[2,3,1]]│

View File

@ -5,23 +5,45 @@ sidebar_position: 169
# minMap
Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))`
Calculates the minimum from `value` array according to the keys specified in the `key` array.
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
**Syntax**
The number of elements in `key` and `value` must be the same for each row that is totaled.
```sql
`minMap(key, value)`
```
or
```sql
minMap(Tuple(key, value))
```
Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
Alias: `minMappedArrays`
Example:
:::note
- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values.
- The number of elements in `key` and `value` must be the same for each row that is totaled.
:::
**Parameters**
- `key` — Array of keys. [Array](../../data-types/array.md).
- `value` — Array of values. [Array](../../data-types/array.md).
**Returned value**
- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
**Example**
Query:
``` sql
SELECT minMap(a, b)
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
```
Result:
``` text
┌─minMap(a, b)──────┐
│ ([1,2,3],[2,1,1]) │

View File

@ -12,9 +12,7 @@ Functions for [searching](string-search-functions.md) in strings and for [replac
## empty
Checks whether the input string is empty.
A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
Checks whether the input string is empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty).
@ -48,9 +46,7 @@ Result:
## notEmpty
Checks whether the input string is non-empty.
A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
Checks whether the input string is non-empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty).
@ -96,7 +92,7 @@ length(s)
**Parameters**
- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array).
- `s` An input string or array. [String](../data-types/string)/[Array](../data-types/array).
**Returned value**
@ -149,7 +145,7 @@ lengthUTF8(s)
**Parameters**
- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string).
- `s` String containing valid UTF-8 encoded text. [String](../data-types/string).
**Returned value**
@ -183,8 +179,8 @@ left(s, offset)
**Parameters**
- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
- `s` The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset` The number of bytes of the offset. [UInt*](../data-types/int-uint).
**Returned value**
@ -230,8 +226,8 @@ leftUTF8(s, offset)
**Parameters**
- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
- `s` The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset` The number of bytes of the offset. [UInt*](../data-types/int-uint).
**Returned value**
@ -347,8 +343,8 @@ right(s, offset)
**Parameters**
- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
- `s` The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset` The number of bytes of the offset. [UInt*](../data-types/int-uint).
**Returned value**
@ -394,8 +390,8 @@ rightUTF8(s, offset)
**Parameters**
- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
- `s` The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset` The number of bytes of the offset. [UInt*](../data-types/int-uint).
**Returned value**
@ -547,7 +543,7 @@ Alias: `ucase`
**Parameters**
- `input`: A string type [String](../data-types/string.md).
- `input` A string type [String](../data-types/string.md).
**Returned value**
@ -571,16 +567,47 @@ SELECT upper('clickhouse');
Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
:::note
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point.
:::
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
**Syntax**
```sql
lowerUTF8(input)
```
**Parameters**
- `input` — A string type [String](../data-types/string.md).
**Returned value**
- A [String](../data-types/string.md) data type value.
**Example**
Query:
``` sql
SELECT lowerUTF8('MÜNCHEN') as Lowerutf8;
```
Result:
``` response
┌─Lowerutf8─┐
│ münchen │
└───────────┘
```
## upperUTF8
Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
:::note
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point.
:::
**Syntax**
@ -590,7 +617,7 @@ upperUTF8(input)
**Parameters**
- `input`: A string type [String](../data-types/string.md).
- `input` A string type [String](../data-types/string.md).
**Returned value**
@ -604,6 +631,8 @@ Query:
SELECT upperUTF8('München') as Upperutf8;
```
Result:
``` response
┌─Upperutf8─┐
│ MÜNCHEN │
@ -614,6 +643,34 @@ SELECT upperUTF8('München') as Upperutf8;
Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0.
**Syntax**
``` sql
isValidUTF8(input)
```
**Parameters**
- `input` — A string type [String](../data-types/string.md).
**Returned value**
- Returns `1`, if the set of bytes constitutes valid UTF-8-encoded text, otherwise `0`.
Query:
``` sql
SELECT isValidUTF8('\xc3\xb1') AS valid, isValidUTF8('\xc3\x28') AS invalid;
```
Result:
``` response
┌─valid─┬─invalid─┐
│ 1 │ 0 │
└───────┴─────────┘
```
## toValidUTF8
Replaces invalid UTF-8 characters by the `<60>` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character.
@ -883,7 +940,7 @@ Returns the substring of a string `s` which starts at the specified byte index `
substring(s, offset[, length])
```
Alias:
Aliases:
- `substr`
- `mid`
- `byteSlice`
@ -926,9 +983,9 @@ substringUTF8(s, offset[, length])
**Arguments**
- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.
- `s` The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
- `offset` The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
- `length` The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.
**Returned value**
@ -964,9 +1021,9 @@ Alias: `SUBSTRING_INDEX`
**Arguments**
- s: The string to extract substring from. [String](../data-types/string.md).
- delim: The character to split. [String](../data-types/string.md).
- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
- s The string to extract substring from. [String](../data-types/string.md).
- delim The character to split. [String](../data-types/string.md).
- count The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
**Example**
@ -995,9 +1052,9 @@ substringIndexUTF8(s, delim, count)
**Arguments**
- `s`: The string to extract substring from. [String](../data-types/string.md).
- `delim`: The character to split. [String](../data-types/string.md).
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
- `s` The string to extract substring from. [String](../data-types/string.md).
- `delim` The character to split. [String](../data-types/string.md).
- `count` The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
**Returned value**
@ -1277,7 +1334,7 @@ tryBase64Decode(encoded)
**Arguments**
- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.
- `encoded` [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.
**Returned value**
@ -1309,7 +1366,7 @@ tryBase64URLDecode(encodedUrl)
**Parameters**
- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
- `encodedURL` [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
**Returned value**
@ -2011,7 +2068,7 @@ soundex(val)
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
- `val` Input value. [String](../data-types/string.md)
**Returned value**
@ -2044,7 +2101,7 @@ punycodeEncode(val)
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
- `val` Input value. [String](../data-types/string.md)
**Returned value**
@ -2077,7 +2134,7 @@ punycodeEncode(val)
**Arguments**
- `val` - Punycode-encoded string. [String](../data-types/string.md)
- `val` Punycode-encoded string. [String](../data-types/string.md)
**Returned value**
@ -2103,7 +2160,7 @@ Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded s
## idnaEncode
Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
Returns the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
Note: No percent decoding or trimming of tabs, spaces or control characters is performed.
@ -2115,7 +2172,7 @@ idnaEncode(val)
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
- `val` Input value. [String](../data-types/string.md)
**Returned value**
@ -2141,7 +2198,7 @@ Like `idnaEncode` but returns an empty string in case of an error instead of thr
## idnaDecode
Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
Returns the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
In case of an error (e.g. because the input is invalid), the input string is returned.
Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.
@ -2153,7 +2210,7 @@ idnaDecode(val)
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
- `val` Input value. [String](../data-types/string.md)
**Returned value**
@ -2197,7 +2254,7 @@ Result:
└───────────────────────────────────────────┘
```
Alias: mismatches
Alias: `mismatches`
## stringJaccardIndex
@ -2251,7 +2308,7 @@ Result:
└─────────────────────────────────────┘
```
Alias: levenshteinDistance
Alias: `levenshteinDistance`
## editDistanceUTF8
@ -2277,7 +2334,7 @@ Result:
└─────────────────────────────────────┘
```
Alias: levenshteinDistanceUTF8
Alias: `levenshteinDistanceUTF8`
## damerauLevenshteinDistance
@ -2355,13 +2412,93 @@ Result:
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
:::note
Because `initCap` converts only the first letter of each word to upper case you may observe unexpected behaviour for words containing apostrophes or capital letters. For example:
```sql
SELECT initCap('mother''s daughter'), initCap('joe McAdam');
```
will return
```response
┌─initCap('mother\'s daughter')─┬─initCap('joe McAdam')─┐
│ Mother'S Daughter │ Joe Mcadam │
└───────────────────────────────┴───────────────────────┘
```
This is a known behaviour, with no plans currently to fix it.
:::
**Syntax**
```sql
initcap(val)
```
**Arguments**
- `val` — Input value. [String](../data-types/string.md).
**Returned value**
- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md).
**Example**
Query:
```sql
SELECT initcap('building for fast');
```
Result:
```text
┌─initcap('building for fast')─┐
│ Building For Fast │
└──────────────────────────────┘
```
## initcapUTF8
Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
Like [initcap](#initcap), `initcapUTF8` converts the first letter of each word to upper case and the rest to lower case. Assumes that the string contains valid UTF-8 encoded text.
If this assumption is violated, no exception is thrown and the result is undefined.
:::note
This function does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
:::
**Syntax**
```sql
initcapUTF8(val)
```
**Arguments**
- `val` — Input value. [String](../data-types/string.md).
**Returned value**
- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md).
**Example**
Query:
```sql
SELECT initcapUTF8('не тормозит');
```
Result:
```text
┌─initcapUTF8('не тормозит')─┐
Не Тормозит │
└────────────────────────────┘
```
## firstLine
@ -2375,7 +2512,7 @@ firstLine(val)
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
- `val` Input value. [String](../data-types/string.md)
**Returned value**

View File

@ -34,7 +34,7 @@ Alias: `replace`.
Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string.
`replacement` can containing substitutions `\0-\9`.
`replacement` can contain substitutions `\0-\9`.
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`.

View File

@ -31,8 +31,10 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
clickhouse_add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
target_link_libraries (lru_hash_map_perf PRIVATE dbms)
clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
if (OS_LINUX)
clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
endif()
clickhouse_add_executable (array_cache array_cache.cpp)
target_link_libraries (array_cache PRIVATE clickhouse_common_io)

View File

@ -1062,6 +1062,7 @@ class IColumn;
M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \
M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \
M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \
M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \

View File

@ -61,6 +61,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
{"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
{"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},

View File

@ -716,6 +716,16 @@ static void writeFieldsToColumn(
null_map_column->insertValue(0);
}
else
{
// Column is not null but field is null. It's possible due to overrides
if (field.isNull())
{
column_to.insertDefault();
return false;
}
}
return true;
};
@ -791,7 +801,7 @@ static void writeFieldsToColumn(
if (write_data_to_null_map(value, index))
{
const String & data = value.get<const String &>();
const String & data = value.safeGet<const String &>();
casted_string_column->insertData(data.data(), data.size());
}
}

View File

@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;

View File

@ -403,6 +403,7 @@ struct FormatSettings
bool use_fast_decoder = true;
bool filter_push_down = true;
UInt64 output_row_index_stride = 10'000;
bool read_use_writer_time_zone = false;
} orc{};
/// For capnProto format we should determine how to

View File

@ -1,9 +1,12 @@
#pragma once
#include <base/types.h>
#include <Columns/ColumnString.h>
#include <Common/OptimizedRegularExpression.h>
#include <Common/re2.h>
#include <Functions/Regexps.h>
#include <Functions/ReplaceStringImpl.h>
#include <IO/WriteHelpers.h>
#include <base/types.h>
namespace DB
{
@ -48,45 +51,75 @@ struct ReplaceRegexpImpl
static constexpr int max_captures = 10;
static Instructions createInstructions(std::string_view replacement, int num_captures)
/// The replacement string references must not contain non-existing capturing groups.
static void checkSubstitutions(std::string_view replacement, int num_captures)
{
Instructions instructions;
String literals;
for (size_t i = 0; i < replacement.size(); ++i)
{
if (replacement[i] == '\\' && i + 1 < replacement.size())
{
if (isNumericASCII(replacement[i + 1])) /// Substitution
if (isNumericASCII(replacement[i + 1])) /// substitution
{
int substitution_num = replacement[i + 1] - '0';
if (substitution_num >= num_captures)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1);
}
}
}
}
static Instructions createInstructions(std::string_view replacement, int num_captures)
{
checkSubstitutions(replacement, num_captures);
Instructions instructions;
String literals;
literals.reserve(replacement.size());
for (size_t i = 0; i < replacement.size(); ++i)
{
if (replacement[i] == '\\' && i + 1 < replacement.size())
{
if (isNumericASCII(replacement[i + 1])) /// substitution
{
if (!literals.empty())
{
instructions.emplace_back(literals);
literals = "";
}
instructions.emplace_back(replacement[i + 1] - '0');
int substitution_num = replacement[i + 1] - '0';
instructions.emplace_back(substitution_num);
}
else
literals += replacement[i + 1]; /// Escaping
literals += replacement[i + 1]; /// escaping
++i;
}
else
literals += replacement[i]; /// Plain character
literals += replacement[i]; /// plain character
}
if (!literals.empty())
instructions.emplace_back(literals);
for (const auto & instr : instructions)
if (instr.substitution_num >= num_captures)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups",
instr.substitution_num, num_captures - 1);
return instructions;
}
static bool canFallbackToStringReplacement(const String & needle, const String & replacement, const re2::RE2 & searcher, int num_captures)
{
if (searcher.NumberOfCapturingGroups())
return false;
checkSubstitutions(replacement, num_captures);
String required_substring;
bool is_trivial;
bool required_substring_is_prefix;
std::vector<String> alternatives;
OptimizedRegularExpression::analyze(needle, required_substring, is_trivial, required_substring_is_prefix, alternatives);
return is_trivial && required_substring_is_prefix && required_substring == needle;
}
static void processString(
const char * haystack_data,
size_t haystack_length,
@ -124,7 +157,7 @@ struct ReplaceRegexpImpl
{
std::string_view replacement;
if (instr.substitution_num >= 0)
replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size());
replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()};
else
replacement = instr.literal;
res_data.resize(res_data.size() + replacement.size());
@ -179,19 +212,32 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
/// Try to use non-regexp string replacement. This shortcut is implemented only for const-needles + const-replacement as
/// pattern analysis incurs some cost too.
if (canFallbackToStringReplacement(needle, replacement, searcher, num_captures))
{
auto convertTrait = [](ReplaceRegexpTraits::Replace first_or_all)
{
switch (first_or_all)
{
case ReplaceRegexpTraits::Replace::First: return ReplaceStringTraits::Replace::First;
case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All;
}
};
ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets);
return;
}
Instructions instructions = createInstructions(replacement, num_captures);
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -221,10 +267,8 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -242,6 +286,7 @@ struct ReplaceRegexpImpl
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
@ -270,17 +315,14 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -290,8 +332,9 @@ struct ReplaceRegexpImpl
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
std::string_view replacement(repl_data, repl_length);
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
Instructions instructions = createInstructions(replacement, num_captures);
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
@ -317,10 +360,8 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -338,12 +379,14 @@ struct ReplaceRegexpImpl
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
std::string_view replacement(repl_data, repl_length);
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
Instructions instructions = createInstructions(replacement, num_captures);
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
@ -367,16 +410,13 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)

View File

@ -713,8 +713,12 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo()
/// fall back to slow whole-file reads when HEAD is actually supported; that sounds
/// like a nightmare to debug.)
if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 &&
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS)
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS &&
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_REQUEST_TIMEOUT &&
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_MISDIRECTED_REQUEST)
{
return HTTPFileInfo{};
}
throw;
}

View File

@ -59,10 +59,10 @@ clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effo
target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)
clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp)
target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib)
target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io)
clickhouse_add_executable (dragonbox_test dragonbox_test.cpp)
target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars)
target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io)
clickhouse_add_executable (zstd_buffers zstd_buffers.cpp)
target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io)

View File

@ -29,33 +29,12 @@ BlockIO InterpreterSetRoleQuery::execute()
void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query)
{
auto & access_control = getContext()->getAccessControl();
auto session_context = getContext()->getSessionContext();
auto user = session_context->getUser();
if (query.kind == ASTSetRoleQuery::Kind::SET_ROLE_DEFAULT)
{
session_context->setCurrentRolesDefault();
}
else
{
RolesOrUsersSet roles_from_query{*query.roles, access_control};
std::vector<UUID> new_current_roles;
if (roles_from_query.all)
{
new_current_roles = user->granted_roles.findGranted(roles_from_query);
}
else
{
for (const auto & id : roles_from_query.getMatchingIDs())
{
if (!user->granted_roles.isGranted(id))
throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current");
new_current_roles.emplace_back(id);
}
}
session_context->setCurrentRoles(new_current_roles);
}
session_context->setCurrentRoles(RolesOrUsersSet{*query.roles, session_context->getAccessControl()});
}

View File

@ -726,7 +726,10 @@ try
/// Access rights must be checked for the user who executed the initial INSERT query.
if (key.user_id)
insert_context->setUser(*key.user_id, key.current_roles);
{
insert_context->setUser(*key.user_id);
insert_context->setCurrentRoles(key.current_roles);
}
insert_context->setSettings(key.settings);

View File

@ -58,6 +58,7 @@
#include <Access/EnabledRowPolicies.h>
#include <Access/QuotaUsage.h>
#include <Access/User.h>
#include <Access/Role.h>
#include <Access/SettingsProfile.h>
#include <Access/SettingsProfilesInfo.h>
#include <Access/SettingsConstraintsAndProfileIDs.h>
@ -190,6 +191,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int CLUSTER_DOESNT_EXIST;
extern const int SET_NON_GRANTED_ROLE;
}
#define SHUTDOWN(log, desc, ptr, method) do \
@ -1303,7 +1305,7 @@ ConfigurationPtr Context::getUsersConfig()
return shared->users_config;
}
void Context::setUser(const UUID & user_id_, const std::optional<const std::vector<UUID>> & current_roles_)
void Context::setUser(const UUID & user_id_)
{
/// Prepare lists of user's profiles, constraints, settings, roles.
/// NOTE: AccessControl::read<User>() and other AccessControl's functions may require some IO work,
@ -1312,8 +1314,8 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
auto & access_control = getAccessControl();
auto user = access_control.read<User>(user_id_);
auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {});
auto default_roles = user->granted_roles.findGranted(user->default_roles);
auto enabled_roles = access_control.getEnabledRolesInfo(default_roles, {});
auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
const auto & database = user->default_database;
@ -1327,7 +1329,7 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
/// so we shouldn't check constraints here.
setCurrentProfilesWithLock(*enabled_profiles, /* check_constraints= */ false, lock);
setCurrentRolesWithLock(new_current_roles, lock);
setCurrentRolesWithLock(default_roles, lock);
/// It's optional to specify the DEFAULT DATABASE in the user's definition.
if (!database.empty())
@ -1362,25 +1364,66 @@ std::optional<UUID> Context::getUserID() const
return user_id;
}
void Context::setCurrentRolesWithLock(const std::vector<UUID> & current_roles_, const std::lock_guard<ContextSharedMutex> &)
void Context::setCurrentRolesWithLock(const std::vector<UUID> & new_current_roles, const std::lock_guard<ContextSharedMutex> &)
{
if (current_roles_.empty())
if (new_current_roles.empty())
current_roles = nullptr;
else
current_roles = std::make_shared<std::vector<UUID>>(current_roles_);
current_roles = std::make_shared<std::vector<UUID>>(new_current_roles);
need_recalculate_access = true;
}
void Context::setCurrentRoles(const std::vector<UUID> & current_roles_)
void Context::setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user)
{
std::lock_guard lock(mutex);
setCurrentRolesWithLock(current_roles_, lock);
if (skip_if_not_granted)
{
auto filtered_role_ids = user->granted_roles.findGranted(new_current_roles);
std::lock_guard lock{mutex};
setCurrentRolesWithLock(filtered_role_ids, lock);
return;
}
if (throw_if_not_granted)
{
for (const auto & role_id : new_current_roles)
{
if (!user->granted_roles.isGranted(role_id))
{
auto role_name = getAccessControl().tryReadName(role_id);
throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", role_name.value_or(toString(role_id)));
}
}
}
std::lock_guard lock2{mutex};
setCurrentRolesWithLock(new_current_roles, lock2);
}
void Context::setCurrentRoles(const std::vector<UUID> & new_current_roles, bool check_grants)
{
setCurrentRolesImpl(new_current_roles, /* throw_if_not_granted= */ check_grants, /* skip_if_not_granted= */ !check_grants, getUser());
}
void Context::setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants)
{
if (new_current_roles.all)
{
auto user = getUser();
setCurrentRolesImpl(user->granted_roles.findGranted(new_current_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user);
}
else
{
setCurrentRoles(new_current_roles.getMatchingIDs(), check_grants);
}
}
void Context::setCurrentRoles(const Strings & new_current_roles, bool check_grants)
{
setCurrentRoles(getAccessControl().getIDs<Role>(new_current_roles), check_grants);
}
void Context::setCurrentRolesDefault()
{
auto user = getUser();
setCurrentRoles(user->granted_roles.findGranted(user->default_roles));
setCurrentRolesImpl(user->granted_roles.findGranted(user->default_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user);
}
std::vector<UUID> Context::getCurrentRoles() const

View File

@ -61,6 +61,7 @@ class AccessFlags;
struct AccessRightsElement;
class AccessRightsElements;
enum class RowPolicyFilterType : uint8_t;
struct RolesOrUsersSet;
class EmbeddedDictionaries;
class ExternalDictionariesLoader;
class ExternalUserDefinedExecutableFunctionsLoader;
@ -607,13 +608,15 @@ public:
/// Sets the current user assuming that he/she is already authenticated.
/// WARNING: This function doesn't check password!
void setUser(const UUID & user_id_, const std::optional<const std::vector<UUID>> & current_roles_ = {});
void setUser(const UUID & user_id_);
UserPtr getUser() const;
std::optional<UUID> getUserID() const;
String getUserName() const;
void setCurrentRoles(const std::vector<UUID> & current_roles_);
void setCurrentRoles(const Strings & new_current_roles, bool check_grants = true);
void setCurrentRoles(const std::vector<UUID> & new_current_roles, bool check_grants = true);
void setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants = true);
void setCurrentRolesDefault();
std::vector<UUID> getCurrentRoles() const;
std::vector<UUID> getEnabledRoles() const;
@ -1347,7 +1350,7 @@ private:
void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock);
void setCurrentRolesWithLock(const std::vector<UUID> & current_roles_, const std::lock_guard<ContextSharedMutex> & lock);
void setCurrentRolesWithLock(const std::vector<UUID> & new_current_roles, const std::lock_guard<ContextSharedMutex> & lock);
void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard<ContextSharedMutex> & lock);
@ -1380,6 +1383,7 @@ private:
void initGlobal();
void setUserID(const UUID & user_id_);
void setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user);
template <typename... Args>
void checkAccessImpl(const Args &... args) const;

View File

@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader()
orc::RowReaderOptions row_reader_options;
row_reader_options.includeTypes(include_indices);
if (format_settings.orc.read_use_writer_time_zone)
{
String writer_time_zone = current_stripe_info->getWriterTimezone();
row_reader_options.setTimezoneName(writer_time_zone);
}
row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
if (format_settings.orc.filter_push_down && sarg)
{

View File

@ -0,0 +1,265 @@
#include <Server/HTTP/authenticateUserByHTTP.h>
#include <Access/Authentication.h>
#include <Access/Common/SSLCertificateSubjects.h>
#include <Access/Credentials.h>
#include <Access/ExternalAuthenticators.h>
#include <Common/Base64.h>
#include <Server/HTTP/HTTPServerRequest.h>
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/HTTPServerResponse.h>
#include <Interpreters/Context.h>
#include <Interpreters/Session.h>
#include <Poco/Net/HTTPBasicCredentials.h>
#if USE_SSL
#include <Poco/Net/X509Certificate.h>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int AUTHENTICATION_FAILED;
extern const int BAD_ARGUMENTS;
extern const int SUPPORT_IS_DISABLED;
}
namespace
{
/// Throws an exception that multiple authorization schemes are used simultaneously.
[[noreturn]] void throwMultipleAuthenticationMethods(std::string_view method1, std::string_view method2)
{
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: it is not allowed to use {} and {} simultaneously", method1, method2);
}
/// Checks that a specified user name is not empty, and throws an exception if it's empty.
void checkUserNameNotEmpty(const String & user_name, std::string_view method)
{
if (user_name.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Got an empty user name from {}", method);
}
}
bool authenticateUserByHTTP(
const HTTPServerRequest & request,
const HTMLForm & params,
HTTPServerResponse & response,
Session & session,
std::unique_ptr<Credentials> & request_credentials,
ContextPtr global_context,
LoggerPtr log)
{
/// Get the credentials created by the previous call of authenticateUserByHTTP() while handling the previous HTTP request.
auto current_credentials = std::move(request_credentials);
/// The user and password can be passed by headers (similar to X-Auth-*),
/// which is used by load balancers to pass authentication information.
std::string user = request.get("X-ClickHouse-User", "");
std::string password = request.get("X-ClickHouse-Key", "");
std::string quota_key = request.get("X-ClickHouse-Quota", "");
bool has_auth_headers = !user.empty() || !password.empty();
/// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name
/// extracted from the SSL certificate used for this connection instead of checking password.
bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on");
/// User name and password can be passed using HTTP Basic auth or query parameters
/// (both methods are insecure).
bool has_http_credentials = request.hasCredentials();
bool has_credentials_in_query_params = params.has("user") || params.has("password");
std::string spnego_challenge;
SSLCertificateSubjects certificate_subjects;
if (has_ssl_certificate_auth)
{
#if USE_SSL
/// For SSL certificate authentication we extract the user name from the "X-ClickHouse-User" HTTP header.
checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers");
/// It is prohibited to mix different authorization schemes.
if (!password.empty())
throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via password");
if (has_http_credentials)
throwMultipleAuthenticationMethods("SSL certificate authentication", "Authorization HTTP header");
if (has_credentials_in_query_params)
throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via parameters");
if (request.havePeerCertificate())
certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate());
if (certificate_subjects.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"SSL certificate authentication disabled because ClickHouse was built without SSL library");
#endif
}
else if (has_auth_headers)
{
checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers");
/// It is prohibited to mix different authorization schemes.
if (has_http_credentials)
throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "Authorization HTTP header");
if (has_credentials_in_query_params)
throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "authentication via parameters");
}
else if (has_http_credentials)
{
/// It is prohibited to mix different authorization schemes.
if (has_credentials_in_query_params)
throwMultipleAuthenticationMethods("Authorization HTTP header", "authentication via parameters");
std::string scheme;
std::string auth_info;
request.getCredentials(scheme, auth_info);
if (Poco::icompare(scheme, "Basic") == 0)
{
Poco::Net::HTTPBasicCredentials credentials(auth_info);
user = credentials.getUsername();
password = credentials.getPassword();
checkUserNameNotEmpty(user, "Authorization HTTP header");
}
else if (Poco::icompare(scheme, "Negotiate") == 0)
{
spnego_challenge = auth_info;
if (spnego_challenge.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty");
}
else
{
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme);
}
}
else
{
/// If the user name is not set we assume it's the 'default' user.
user = params.get("user", "default");
password = params.get("password", "");
checkUserNameNotEmpty(user, "authentication via parameters");
}
if (!certificate_subjects.empty())
{
chassert(!user.empty());
if (!current_credentials)
current_credentials = std::make_unique<SSLCertificateCredentials>(user, std::move(certificate_subjects));
auto * certificate_credentials = dynamic_cast<SSLCertificateCredentials *>(current_credentials.get());
if (!certificate_credentials)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme");
}
else if (!spnego_challenge.empty())
{
if (!current_credentials)
current_credentials = global_context->makeGSSAcceptorContext();
auto * gss_acceptor_context = dynamic_cast<GSSAcceptorContext *>(current_credentials.get());
if (!gss_acceptor_context)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected");
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunreachable-code"
const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log));
#pragma clang diagnostic pop
if (!spnego_response.empty())
response.set("WWW-Authenticate", "Negotiate " + spnego_response);
if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady())
{
if (spnego_response.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
/// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
request_credentials = std::move(current_credentials);
return false;
}
}
else // I.e., now using user name and password strings ("Basic").
{
if (!current_credentials)
current_credentials = std::make_unique<BasicCredentials>();
auto * basic_credentials = dynamic_cast<BasicCredentials *>(current_credentials.get());
if (!basic_credentials)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme");
chassert(!user.empty());
basic_credentials->setUserName(user);
basic_credentials->setPassword(password);
}
if (params.has("quota_key"))
{
if (!quota_key.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Invalid authentication: it is not allowed "
"to use quota key as HTTP header and as parameter simultaneously");
quota_key = params.get("quota_key");
}
/// Set client info. It will be used for quota accounting parameters in 'setUser' method.
session.setHTTPClientInfo(request);
session.setQuotaClientKey(quota_key);
/// Extract the last entry from comma separated list of forwarded_for addresses.
/// Only the last proxy can be trusted (if any).
String forwarded_address = session.getClientInfo().getLastForwardedFor();
try
{
if (!forwarded_address.empty() && global_context->getConfigRef().getBool("auth_use_forwarded_address", false))
session.authenticate(*current_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port()));
else
session.authenticate(*current_credentials, request.clientAddress());
}
catch (const Authentication::Require<BasicCredentials> & required_credentials)
{
current_credentials = std::make_unique<BasicCredentials>();
if (required_credentials.getRealm().empty())
response.set("WWW-Authenticate", "Basic");
else
response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\"");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
/// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
request_credentials = std::move(current_credentials);
return false;
}
catch (const Authentication::Require<GSSAcceptorContext> & required_credentials)
{
current_credentials = global_context->makeGSSAcceptorContext();
if (required_credentials.getRealm().empty())
response.set("WWW-Authenticate", "Negotiate");
else
response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\"");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
/// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
request_credentials = std::move(current_credentials);
return false;
}
return true;
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <Common/logger_useful.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class HTTPServerRequest;
class HTMLForm;
class HTTPServerResponse;
class Session;
class Credentials;
/// Authenticates a user via HTTP protocol and initializes a session.
/// Usually retrieves the name and the password for that user from either the request's headers or from the query parameters.
/// Returns true when the user successfully authenticated,
/// the session instance will be configured accordingly, and the request_credentials instance will be dropped.
/// Returns false when the user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header,
/// in this case the `request_credentials` instance must be preserved until the next request or until any exception.
/// Throws an exception if authentication failed.
bool authenticateUserByHTTP(
const HTTPServerRequest & request,
const HTMLForm & params,
HTTPServerResponse & response,
Session & session,
std::unique_ptr<Credentials> & request_credentials,
ContextPtr global_context,
LoggerPtr log);
}

View File

@ -0,0 +1,158 @@
#include <Server/HTTP/exceptionCodeToHTTPStatus.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_PARSE_TEXT;
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
extern const int CANNOT_PARSE_QUOTED_STRING;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_PARSE_NUMBER;
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
extern const int CANNOT_PARSE_IPV4;
extern const int CANNOT_PARSE_IPV6;
extern const int CANNOT_PARSE_UUID;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_SCHEDULE_TASK;
extern const int CANNOT_OPEN_FILE;
extern const int CANNOT_COMPILE_REGEXP;
extern const int DUPLICATE_COLUMN;
extern const int ILLEGAL_COLUMN;
extern const int THERE_IS_NO_COLUMN;
extern const int UNKNOWN_ELEMENT_IN_AST;
extern const int UNKNOWN_TYPE_OF_AST_NODE;
extern const int TOO_DEEP_AST;
extern const int TOO_BIG_AST;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
extern const int SYNTAX_ERROR;
extern const int INCORRECT_DATA;
extern const int TYPE_MISMATCH;
extern const int UNKNOWN_TABLE;
extern const int UNKNOWN_FUNCTION;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNKNOWN_TYPE;
extern const int UNKNOWN_STORAGE;
extern const int UNKNOWN_DATABASE;
extern const int UNKNOWN_SETTING;
extern const int UNKNOWN_DIRECTION_OF_SORTING;
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int UNKNOWN_FORMAT;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int UNKNOWN_TYPE_OF_QUERY;
extern const int UNKNOWN_ROLE;
extern const int QUERY_IS_TOO_LARGE;
extern const int NOT_IMPLEMENTED;
extern const int SOCKET_TIMEOUT;
extern const int UNKNOWN_USER;
extern const int WRONG_PASSWORD;
extern const int REQUIRED_PASSWORD;
extern const int AUTHENTICATION_FAILED;
extern const int SET_NON_GRANTED_ROLE;
extern const int HTTP_LENGTH_REQUIRED;
extern const int TIMEOUT_EXCEEDED;
}
Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code)
{
using namespace Poco::Net;
if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
{
return HTTPResponse::HTTP_UNAUTHORIZED;
}
else if (exception_code == ErrorCodes::UNKNOWN_USER ||
exception_code == ErrorCodes::WRONG_PASSWORD ||
exception_code == ErrorCodes::AUTHENTICATION_FAILED ||
exception_code == ErrorCodes::SET_NON_GRANTED_ROLE)
{
return HTTPResponse::HTTP_FORBIDDEN;
}
else if (exception_code == ErrorCodes::BAD_ARGUMENTS ||
exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP ||
exception_code == ErrorCodes::CANNOT_PARSE_TEXT ||
exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE ||
exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING ||
exception_code == ErrorCodes::CANNOT_PARSE_DATE ||
exception_code == ErrorCodes::CANNOT_PARSE_DATETIME ||
exception_code == ErrorCodes::CANNOT_PARSE_NUMBER ||
exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING ||
exception_code == ErrorCodes::CANNOT_PARSE_IPV4 ||
exception_code == ErrorCodes::CANNOT_PARSE_IPV6 ||
exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED ||
exception_code == ErrorCodes::CANNOT_PARSE_UUID ||
exception_code == ErrorCodes::DUPLICATE_COLUMN ||
exception_code == ErrorCodes::ILLEGAL_COLUMN ||
exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST ||
exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE ||
exception_code == ErrorCodes::THERE_IS_NO_COLUMN ||
exception_code == ErrorCodes::TOO_DEEP_AST ||
exception_code == ErrorCodes::TOO_BIG_AST ||
exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE ||
exception_code == ErrorCodes::SYNTAX_ERROR ||
exception_code == ErrorCodes::INCORRECT_DATA ||
exception_code == ErrorCodes::TYPE_MISMATCH ||
exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE)
{
return HTTPResponse::HTTP_BAD_REQUEST;
}
else if (exception_code == ErrorCodes::UNKNOWN_TABLE ||
exception_code == ErrorCodes::UNKNOWN_FUNCTION ||
exception_code == ErrorCodes::UNKNOWN_IDENTIFIER ||
exception_code == ErrorCodes::UNKNOWN_TYPE ||
exception_code == ErrorCodes::UNKNOWN_STORAGE ||
exception_code == ErrorCodes::UNKNOWN_DATABASE ||
exception_code == ErrorCodes::UNKNOWN_SETTING ||
exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING ||
exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION ||
exception_code == ErrorCodes::UNKNOWN_FORMAT ||
exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE ||
exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY ||
exception_code == ErrorCodes::UNKNOWN_ROLE)
{
return HTTPResponse::HTTP_NOT_FOUND;
}
else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE)
{
return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE;
}
else if (exception_code == ErrorCodes::NOT_IMPLEMENTED)
{
return HTTPResponse::HTTP_NOT_IMPLEMENTED;
}
else if (exception_code == ErrorCodes::SOCKET_TIMEOUT ||
exception_code == ErrorCodes::CANNOT_OPEN_FILE)
{
return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
}
else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED)
{
return HTTPResponse::HTTP_LENGTH_REQUIRED;
}
else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED)
{
return HTTPResponse::HTTP_REQUEST_TIMEOUT;
}
else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK)
{
return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
}
return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR;
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <Poco/Net/HTTPResponse.h>
namespace DB
{
/// Converts Exception code to HTTP status code.
Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code);
}

View File

@ -0,0 +1,80 @@
#include <Server/HTTP/sendExceptionToHTTPClient.h>
#include <Server/HTTP/HTTPServerRequest.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <Server/HTTP/exceptionCodeToHTTPStatus.h>
namespace DB
{
namespace ErrorCodes
{
extern const int HTTP_LENGTH_REQUIRED;
extern const int REQUIRED_PASSWORD;
}
void sendExceptionToHTTPClient(
const String & exception_message,
int exception_code,
HTTPServerRequest & request,
HTTPServerResponse & response,
WriteBufferFromHTTPServerResponse * out,
LoggerPtr log)
{
setHTTPResponseStatusAndHeadersForException(exception_code, request, response, out, log);
if (!out)
{
/// If nothing was sent yet.
WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT};
out_for_message.writeln(exception_message);
out_for_message.finalize();
}
else
{
/// If buffer has data, and that data wasn't sent yet, then no need to send that data
bool data_sent = (out->count() != out->offset());
if (!data_sent)
out->position() = out->buffer().begin();
out->writeln(exception_message);
out->finalize();
}
}
void setHTTPResponseStatusAndHeadersForException(
int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log)
{
if (out)
out->setExceptionCode(exception_code);
else
response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
/// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body
/// to avoid reading part of the current request body in the next request.
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive()
&& exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
{
try
{
if (!request.getStream().eof())
request.getStream().ignoreAll();
}
catch (...)
{
tryLogCurrentException(log, "Cannot read remaining request body during exception handling");
response.setKeepAlive(false);
}
}
if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
response.requireAuthentication("ClickHouse server HTTP API");
else
response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code));
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Common/logger_useful.h>
#include <base/types.h>
namespace DB
{
class HTTPServerRequest;
class HTTPServerResponse;
class WriteBufferFromHTTPServerResponse;
/// Sends an exception to HTTP client. This function doesn't handle its own exceptions so it needs to be wrapped in try-catch.
/// Argument `out` may be either created from `response` or be nullptr (if it wasn't created before the exception).
void sendExceptionToHTTPClient(
const String & exception_message,
int exception_code,
HTTPServerRequest & request,
HTTPServerResponse & response,
WriteBufferFromHTTPServerResponse * out,
LoggerPtr log);
/// Sets "X-ClickHouse-Exception-Code" header and the correspondent HTTP status in the response for an exception.
/// This is a part of what sendExceptionToHTTPClient() does.
void setHTTPResponseStatusAndHeadersForException(
int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log);
}

View File

@ -0,0 +1,24 @@
#include <Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPServerRequest.h>
namespace DB
{
void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method)
{
/// Anything else beside HTTP POST should be readonly queries.
if (http_method != HTTPServerRequest::HTTP_POST)
{
/// 'readonly' setting values mean:
/// readonly = 0 - any query is allowed, client can change any setting.
/// readonly = 1 - only readonly queries are allowed, client can't change settings.
/// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'.
if (context->getSettingsRef().readonly == 0)
context->setSetting("readonly", 2);
}
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Sets readonly = 2 if the current HTTP method is not HTTP POST and if readonly is not set already.
void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method);
}

View File

@ -1,11 +1,6 @@
#include <Server/HTTPHandler.h>
#include <Access/Authentication.h>
#include <Access/Credentials.h>
#include <Access/AccessControl.h>
#include <Access/ExternalAuthenticators.h>
#include <Access/Role.h>
#include <Access/User.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Core/ExternalTable.h>
@ -37,20 +32,14 @@
#include <base/getFQDNOrHostName.h>
#include <base/scope_guard.h>
#include <Server/HTTP/HTTPResponse.h>
#include <Server/HTTP/authenticateUserByHTTP.h>
#include <Server/HTTP/sendExceptionToHTTPClient.h>
#include <Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h>
#include <boost/container/flat_set.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h"
#include <Poco/Base64Decoder.h>
#include <Poco/Base64Encoder.h>
#include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPMessage.h>
#include <Poco/Net/HTTPStream.h>
#include <Poco/MemoryStream.h>
#include <Poco/StreamCopier.h>
#include <Poco/String.h>
#include <Poco/Net/SocketAddress.h>
#include "config.h"
#include <algorithm>
#include <chrono>
@ -60,78 +49,19 @@
#include <unordered_map>
#include <utility>
#if USE_SSL
#include <Poco/Net/X509Certificate.h>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_COMPILE_REGEXP;
extern const int CANNOT_OPEN_FILE;
extern const int CANNOT_PARSE_TEXT;
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
extern const int CANNOT_PARSE_QUOTED_STRING;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_PARSE_NUMBER;
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
extern const int CANNOT_PARSE_IPV4;
extern const int CANNOT_PARSE_IPV6;
extern const int CANNOT_PARSE_UUID;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_SCHEDULE_TASK;
extern const int DUPLICATE_COLUMN;
extern const int ILLEGAL_COLUMN;
extern const int THERE_IS_NO_COLUMN;
extern const int UNKNOWN_ELEMENT_IN_AST;
extern const int UNKNOWN_TYPE_OF_AST_NODE;
extern const int TOO_DEEP_AST;
extern const int TOO_BIG_AST;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
extern const int SYNTAX_ERROR;
extern const int INCORRECT_DATA;
extern const int TYPE_MISMATCH;
extern const int UNKNOWN_TABLE;
extern const int UNKNOWN_FUNCTION;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNKNOWN_TYPE;
extern const int UNKNOWN_STORAGE;
extern const int UNKNOWN_DATABASE;
extern const int UNKNOWN_SETTING;
extern const int UNKNOWN_DIRECTION_OF_SORTING;
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int UNKNOWN_FORMAT;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int UNKNOWN_TYPE_OF_QUERY;
extern const int UNKNOWN_ROLE;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int QUERY_IS_TOO_LARGE;
extern const int NOT_IMPLEMENTED;
extern const int SOCKET_TIMEOUT;
extern const int UNKNOWN_USER;
extern const int WRONG_PASSWORD;
extern const int REQUIRED_PASSWORD;
extern const int AUTHENTICATION_FAILED;
extern const int SET_NON_GRANTED_ROLE;
extern const int INVALID_SESSION_TIMEOUT;
extern const int HTTP_LENGTH_REQUIRED;
extern const int SUPPORT_IS_DISABLED;
extern const int TIMEOUT_EXCEEDED;
}
namespace
@ -173,115 +103,6 @@ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::Laye
}
}
static String base64Decode(const String & encoded)
{
String decoded;
Poco::MemoryInputStream istr(encoded.data(), encoded.size());
Poco::Base64Decoder decoder(istr);
Poco::StreamCopier::copyToString(decoder, decoded);
return decoded;
}
static String base64Encode(const String & decoded)
{
std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
ostr.exceptions(std::ios::failbit);
Poco::Base64Encoder encoder(ostr);
encoder.rdbuf()->setLineLength(0);
encoder << decoded;
encoder.close();
return ostr.str();
}
static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code)
{
using namespace Poco::Net;
if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
{
return HTTPResponse::HTTP_UNAUTHORIZED;
}
else if (exception_code == ErrorCodes::UNKNOWN_USER ||
exception_code == ErrorCodes::WRONG_PASSWORD ||
exception_code == ErrorCodes::AUTHENTICATION_FAILED ||
exception_code == ErrorCodes::SET_NON_GRANTED_ROLE)
{
return HTTPResponse::HTTP_FORBIDDEN;
}
else if (exception_code == ErrorCodes::BAD_ARGUMENTS ||
exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP ||
exception_code == ErrorCodes::CANNOT_PARSE_TEXT ||
exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE ||
exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING ||
exception_code == ErrorCodes::CANNOT_PARSE_DATE ||
exception_code == ErrorCodes::CANNOT_PARSE_DATETIME ||
exception_code == ErrorCodes::CANNOT_PARSE_NUMBER ||
exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING ||
exception_code == ErrorCodes::CANNOT_PARSE_IPV4 ||
exception_code == ErrorCodes::CANNOT_PARSE_IPV6 ||
exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED ||
exception_code == ErrorCodes::CANNOT_PARSE_UUID ||
exception_code == ErrorCodes::DUPLICATE_COLUMN ||
exception_code == ErrorCodes::ILLEGAL_COLUMN ||
exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST ||
exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE ||
exception_code == ErrorCodes::THERE_IS_NO_COLUMN ||
exception_code == ErrorCodes::TOO_DEEP_AST ||
exception_code == ErrorCodes::TOO_BIG_AST ||
exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE ||
exception_code == ErrorCodes::SYNTAX_ERROR ||
exception_code == ErrorCodes::INCORRECT_DATA ||
exception_code == ErrorCodes::TYPE_MISMATCH ||
exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE)
{
return HTTPResponse::HTTP_BAD_REQUEST;
}
else if (exception_code == ErrorCodes::UNKNOWN_TABLE ||
exception_code == ErrorCodes::UNKNOWN_FUNCTION ||
exception_code == ErrorCodes::UNKNOWN_IDENTIFIER ||
exception_code == ErrorCodes::UNKNOWN_TYPE ||
exception_code == ErrorCodes::UNKNOWN_STORAGE ||
exception_code == ErrorCodes::UNKNOWN_DATABASE ||
exception_code == ErrorCodes::UNKNOWN_SETTING ||
exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING ||
exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION ||
exception_code == ErrorCodes::UNKNOWN_FORMAT ||
exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE ||
exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY ||
exception_code == ErrorCodes::UNKNOWN_ROLE)
{
return HTTPResponse::HTTP_NOT_FOUND;
}
else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE)
{
return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE;
}
else if (exception_code == ErrorCodes::NOT_IMPLEMENTED)
{
return HTTPResponse::HTTP_NOT_IMPLEMENTED;
}
else if (exception_code == ErrorCodes::SOCKET_TIMEOUT ||
exception_code == ErrorCodes::CANNOT_OPEN_FILE)
{
return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
}
else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED)
{
return HTTPResponse::HTTP_LENGTH_REQUIRED;
}
else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED)
{
return HTTPResponse::HTTP_REQUEST_TIMEOUT;
}
else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK)
{
return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
}
return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR;
}
static std::chrono::steady_clock::duration parseSessionTimeout(
const Poco::Util::AbstractConfiguration & config,
const HTMLForm & params)
@ -358,204 +179,9 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTP
HTTPHandler::~HTTPHandler() = default;
bool HTTPHandler::authenticateUser(
HTTPServerRequest & request,
HTMLForm & params,
HTTPServerResponse & response)
bool HTTPHandler::authenticateUser(HTTPServerRequest & request, HTMLForm & params, HTTPServerResponse & response)
{
using namespace Poco::Net;
/// The user and password can be passed by headers (similar to X-Auth-*),
/// which is used by load balancers to pass authentication information.
std::string user = request.get("X-ClickHouse-User", "");
std::string password = request.get("X-ClickHouse-Key", "");
std::string quota_key = request.get("X-ClickHouse-Quota", "");
/// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name
/// extracted from the SSL certificate used for this connection instead of checking password.
bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on");
bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth;
/// User name and password can be passed using HTTP Basic auth or query parameters
/// (both methods are insecure).
bool has_http_credentials = request.hasCredentials();
bool has_credentials_in_query_params = params.has("user") || params.has("password");
std::string spnego_challenge;
SSLCertificateSubjects certificate_subjects;
if (has_auth_headers)
{
/// It is prohibited to mix different authorization schemes.
if (has_http_credentials)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: it is not allowed "
"to use SSL certificate authentication and Authorization HTTP header simultaneously");
if (has_credentials_in_query_params)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: it is not allowed "
"to use SSL certificate authentication and authentication via parameters simultaneously simultaneously");
if (has_ssl_certificate_auth)
{
#if USE_SSL
if (!password.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: it is not allowed "
"to use SSL certificate authentication and authentication via password simultaneously");
if (request.havePeerCertificate())
certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate());
if (certificate_subjects.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"SSL certificate authentication disabled because ClickHouse was built without SSL library");
#endif
}
}
else if (has_http_credentials)
{
/// It is prohibited to mix different authorization schemes.
if (has_credentials_in_query_params)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
"Invalid authentication: it is not allowed "
"to use Authorization HTTP header and authentication via parameters simultaneously");
std::string scheme;
std::string auth_info;
request.getCredentials(scheme, auth_info);
if (Poco::icompare(scheme, "Basic") == 0)
{
HTTPBasicCredentials credentials(auth_info);
user = credentials.getUsername();
password = credentials.getPassword();
}
else if (Poco::icompare(scheme, "Negotiate") == 0)
{
spnego_challenge = auth_info;
if (spnego_challenge.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty");
}
else
{
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme);
}
}
else
{
/// If the user name is not set we assume it's the 'default' user.
user = params.get("user", "default");
password = params.get("password", "");
}
if (!certificate_subjects.empty())
{
if (!request_credentials)
request_credentials = std::make_unique<SSLCertificateCredentials>(user, std::move(certificate_subjects));
auto * certificate_credentials = dynamic_cast<SSLCertificateCredentials *>(request_credentials.get());
if (!certificate_credentials)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme");
}
else if (!spnego_challenge.empty())
{
if (!request_credentials)
request_credentials = server.context()->makeGSSAcceptorContext();
auto * gss_acceptor_context = dynamic_cast<GSSAcceptorContext *>(request_credentials.get());
if (!gss_acceptor_context)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected");
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunreachable-code"
const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log));
#pragma clang diagnostic pop
if (!spnego_response.empty())
response.set("WWW-Authenticate", "Negotiate " + spnego_response);
if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady())
{
if (spnego_response.empty())
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
return false;
}
}
else // I.e., now using user name and password strings ("Basic").
{
if (!request_credentials)
request_credentials = std::make_unique<BasicCredentials>();
auto * basic_credentials = dynamic_cast<BasicCredentials *>(request_credentials.get());
if (!basic_credentials)
throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme");
basic_credentials->setUserName(user);
basic_credentials->setPassword(password);
}
if (params.has("quota_key"))
{
if (!quota_key.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Invalid authentication: it is not allowed "
"to use quota key as HTTP header and as parameter simultaneously");
quota_key = params.get("quota_key");
}
/// Set client info. It will be used for quota accounting parameters in 'setUser' method.
session->setHTTPClientInfo(request);
session->setQuotaClientKey(quota_key);
/// Extract the last entry from comma separated list of forwarded_for addresses.
/// Only the last proxy can be trusted (if any).
String forwarded_address = session->getClientInfo().getLastForwardedFor();
try
{
if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
session->authenticate(*request_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port()));
else
session->authenticate(*request_credentials, request.clientAddress());
}
catch (const Authentication::Require<BasicCredentials> & required_credentials)
{
request_credentials = std::make_unique<BasicCredentials>();
if (required_credentials.getRealm().empty())
response.set("WWW-Authenticate", "Basic");
else
response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\"");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
return false;
}
catch (const Authentication::Require<GSSAcceptorContext> & required_credentials)
{
request_credentials = server.context()->makeGSSAcceptorContext();
if (required_credentials.getRealm().empty())
response.set("WWW-Authenticate", "Negotiate");
else
response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\"");
response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
response.send();
return false;
}
request_credentials.reset();
return true;
return authenticateUserByHTTP(request, params, response, *session, request_credentials, server.context(), log);
}
@ -727,10 +353,22 @@ void HTTPHandler::processQuery(
std::unique_ptr<ReadBuffer> in;
static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role",
"buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"};
auto roles = params.getAll("role");
if (!roles.empty())
context->setCurrentRoles(roles);
Names reserved_param_suffixes;
std::string database = request.get("X-ClickHouse-Database", params.get("database", ""));
if (!database.empty())
context->setCurrentDatabase(database);
std::string default_format = request.get("X-ClickHouse-Format", params.get("default_format", ""));
if (!default_format.empty())
context->setDefaultFormat(default_format);
/// Anything else beside HTTP POST should be readonly queries.
setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod());
bool has_external_data = startsWith(request.getContentType(), "multipart/form-data");
auto param_could_be_skipped = [&] (const String & name)
{
@ -738,87 +376,36 @@ void HTTPHandler::processQuery(
if (name.empty())
return true;
/// Some parameters (database, default_format, everything used in the code above) do not
/// belong to the Settings class.
static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role",
"buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session",
"database", "default_format"};
if (reserved_param_names.contains(name))
return true;
for (const String & suffix : reserved_param_suffixes)
/// For external data we also want settings.
if (has_external_data)
{
if (endsWith(name, suffix))
return true;
/// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
/// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings.
static const Names reserved_param_suffixes = {"_format", "_types", "_structure"};
for (const String & suffix : reserved_param_suffixes)
{
if (endsWith(name, suffix))
return true;
}
}
return false;
};
auto roles = params.getAll("role");
if (!roles.empty())
{
const auto & access_control = context->getAccessControl();
const auto & user = context->getUser();
std::vector<UUID> roles_ids(roles.size());
for (size_t i = 0; i < roles.size(); i++)
{
auto role_id = access_control.getID<Role>(roles[i]);
if (user->granted_roles.isGranted(role_id))
roles_ids[i] = role_id;
else
throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", roles[i].get());
}
context->setCurrentRoles(roles_ids);
}
/// Settings can be overridden in the query.
/// Some parameters (database, default_format, everything used in the code above) do not
/// belong to the Settings class.
/// 'readonly' setting values mean:
/// readonly = 0 - any query is allowed, client can change any setting.
/// readonly = 1 - only readonly queries are allowed, client can't change settings.
/// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'.
/// In theory if initially readonly = 0, the client can change any setting and then set readonly
/// to some other value.
const auto & settings = context->getSettingsRef();
/// Anything else beside HTTP POST should be readonly queries.
if (request.getMethod() != HTTPServerRequest::HTTP_POST)
{
if (settings.readonly == 0)
context->setSetting("readonly", 2);
}
bool has_external_data = startsWith(request.getContentType(), "multipart/form-data");
if (has_external_data)
{
/// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
reserved_param_suffixes.reserve(3);
/// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings.
reserved_param_suffixes.emplace_back("_format");
reserved_param_suffixes.emplace_back("_types");
reserved_param_suffixes.emplace_back("_structure");
}
std::string database = request.get("X-ClickHouse-Database", "");
std::string default_format = request.get("X-ClickHouse-Format", "");
SettingsChanges settings_changes;
for (const auto & [key, value] : params)
{
if (key == "database")
{
if (database.empty())
database = value;
}
else if (key == "default_format")
{
if (default_format.empty())
default_format = value;
}
else if (param_could_be_skipped(key))
{
}
else
if (!param_could_be_skipped(key))
{
/// Other than query parameters are treated as settings.
if (!customizeQueryParam(context, key, value))
@ -826,15 +413,9 @@ void HTTPHandler::processQuery(
}
}
if (!database.empty())
context->setCurrentDatabase(database);
if (!default_format.empty())
context->setDefaultFormat(default_format);
/// For external data we also want settings
context->checkSettingsConstraints(settings_changes, SettingSource::QUERY);
context->applySettingsChanges(settings_changes);
const auto & settings = context->getSettingsRef();
/// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields.
context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", "")));
@ -936,7 +517,7 @@ void HTTPHandler::processQuery(
{
bool with_stacktrace = (params.getParsed<bool>("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true));
ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace);
formatExceptionForClient(status.code, request, response, used_output);
setHTTPResponseStatusAndHeadersForException(status.code, request, response, used_output.out_holder.get(), log);
current_output_format.setException(status.message);
current_output_format.finalize();
used_output.exception_is_written = true;
@ -970,7 +551,7 @@ void HTTPHandler::trySendExceptionToClient(
const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
try
{
formatExceptionForClient(exception_code, request, response, used_output);
setHTTPResponseStatusAndHeadersForException(exception_code, request, response, used_output.out_holder.get(), log);
if (!used_output.out_holder && !used_output.exception_is_written)
{
@ -1032,38 +613,6 @@ catch (...)
used_output.cancel();
}
void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
{
if (used_output.out_holder)
used_output.out_holder->setExceptionCode(exception_code);
else
response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
/// FIXME: make sure that no one else is reading from the same stream at the moment.
/// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body
/// to avoid reading part of the current request body in the next request.
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive()
&& exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
{
try
{
if (!request.getStream().eof())
request.getStream().ignoreAll();
}
catch (...)
{
tryLogCurrentException(log, "Cannot read remaining request body during exception handling");
response.setKeepAlive(false);
}
}
if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
response.requireAuthentication("ClickHouse server HTTP API");
else
response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code));
}
void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event)
{
setThreadName("HTTPHandler");

View File

@ -173,12 +173,6 @@ private:
HTTPServerResponse & response,
Output & used_output);
void formatExceptionForClient(
int exception_code,
HTTPServerRequest & request,
HTTPServerResponse & response,
Output & used_output);
static void pushDelayedResults(Output & used_output);
};

View File

@ -93,6 +93,7 @@ struct MergedBlockOutputStream::Finalizer::Impl
void MergedBlockOutputStream::Finalizer::finish()
{
std::unique_ptr<Impl> to_finish = std::move(impl);
impl.reset();
if (to_finish)
to_finish->finish();
}
@ -130,7 +131,19 @@ MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default;
MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default;
MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr<Impl> impl_) : impl(std::move(impl_)) {}
MergedBlockOutputStream::Finalizer::~Finalizer() = default;
MergedBlockOutputStream::Finalizer::~Finalizer()
{
try
{
if (impl)
finish();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void MergedBlockOutputStream::finalizePart(
const MergeTreeMutableDataPartPtr & new_part,

View File

@ -1577,6 +1577,12 @@ bool StorageMergeTree::optimize(
{
assertNotReadonly();
if (deduplicate && getInMemoryMetadataPtr()->hasProjections())
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. "
"User should drop all the projections manually before running the query",
getStorageID().getTableName());
if (deduplicate)
{
if (deduplicate_by_columns.empty())

View File

@ -5778,6 +5778,12 @@ bool StorageReplicatedMergeTree::optimize(
if (!is_leader)
throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader");
if (deduplicate && getInMemoryMetadataPtr()->hasProjections())
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. "
"User should drop all the projections manually before running the query",
getStorageID().getTableName());
if (cleanup)
{
if (!getSettings()->allow_experimental_replacing_merge_with_cleanup)

View File

@ -13,26 +13,28 @@ from get_previous_release_tag import (
PACKAGES_DIR = Path("previous_release_package_folder")
def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None:
def download_packages(
release: ReleaseInfo, dest_path: Path = PACKAGES_DIR, debug: bool = False
) -> None:
dest_path.mkdir(parents=True, exist_ok=True)
logging.info("Will download %s", release)
for pkg, url in release.assets.items():
if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg:
if not pkg.endswith("_amd64.deb") or (not debug and "-dbg_" in pkg):
continue
pkg_name = dest_path / pkg
download_build_with_progress(url, pkg_name)
def download_last_release(dest_path: Path) -> None:
def download_last_release(dest_path: Path, debug: bool = False) -> None:
current_release = get_previous_release(None)
if current_release is None:
raise DownloadException("The current release is not found")
download_packages(current_release, dest_path=dest_path)
download_packages(current_release, dest_path=dest_path, debug=debug)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
release = get_release_by_tag(input())
download_packages(release)
download_packages(release, debug=True)

View File

@ -253,7 +253,7 @@ def main():
packages_path.mkdir(parents=True, exist_ok=True)
if validate_bugfix_check:
download_last_release(packages_path)
download_last_release(packages_path, debug=True)
else:
download_all_deb_packages(check_name, reports_path, packages_path)

View File

@ -185,7 +185,7 @@ def main():
build_path.mkdir(parents=True, exist_ok=True)
if validate_bugfix_check:
download_last_release(build_path)
download_last_release(build_path, debug=True)
else:
download_all_deb_packages(check_name, reports_path, build_path)

View File

@ -0,0 +1,12 @@
<!-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement -->>
<!-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings -->>
<test>
<!-- trivial pattern -->>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
<!-- non-trivial patterns -->>
<!-- deliberately testing with fewer rows to keep runtimes reasonable -->>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
</test>

View File

@ -13,4 +13,4 @@ FROM
SELECT
arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num,
arrayJoin([[1, 2, 4]]) AS id
)
);

View File

@ -1,5 +1,8 @@
set allow_deprecated_syntax_for_merge_tree=1;
set max_threads = 1;
set max_insert_threads = 1;
drop table if exists test_ins_arr;
create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192);
insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000;

View File

@ -1,4 +1,4 @@
-- Tags: no-fasttest
-- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library.
INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION }
INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION }

View File

@ -0,0 +1 @@
Hello l x Hexlo Hexxo

View File

@ -0,0 +1,11 @@
-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement.
-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings
SELECT 'Hello' AS haystack, 'l' AS needle, 'x' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement), replaceRegexpAll(materialize(haystack), needle, replacement);
-- negative tests
-- Even if the fallback is used, invalid substitutions must throw an exception.
SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS }
SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpAll(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS }

View File

@ -0,0 +1,3 @@
1 one
1 one
1 one

View File

@ -0,0 +1,30 @@
-- https://github.com/ClickHouse/ClickHouse/issues/65548
DROP TABLE IF EXISTS test_projection_deduplicate;
CREATE TABLE test_projection_deduplicate
(
`id` Int32,
`string` String,
PROJECTION test_projection
(
SELECT id
GROUP BY id
)
)
ENGINE = MergeTree
PRIMARY KEY id;
INSERT INTO test_projection_deduplicate VALUES (1, 'one');
INSERT INTO test_projection_deduplicate VALUES (1, 'one');
OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; -- { serverError NOT_IMPLEMENTED }
SELECT * FROM test_projection_deduplicate;
ALTER TABLE test_projection_deduplicate DROP PROJECTION test_projection;
OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE;
SELECT * FROM test_projection_deduplicate;
DROP TABLE test_projection_deduplicate;

View File

@ -0,0 +1 @@
1 2024-06-30 20:00:00.000

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "drop table if exists test"
$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id"
$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC"
$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'"
$CLICKHOUSE_CLIENT -q "drop table test"

View File

@ -0,0 +1,13 @@
DROP TABLE IF EXISTS t_subcolumns_join;
CREATE TABLE t_subcolumns_join (id UInt64) ENGINE=MergeTree ORDER BY tuple();
INSERT INTO t_subcolumns_join SELECT number as number FROM numbers(10000);
SELECT
count()
FROM (SELECT number FROM numbers(10)) as tbl LEFT JOIN t_subcolumns_join ON number = id
WHERE id is null
SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1, join_use_nulls = 1;
DROP TABLE t_subcolumns_join;

View File

@ -0,0 +1,11 @@
str_array Array(String)
1318
5779
1715
6422
5875
1887
3763
4245
4270
758

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
DATA_DIR=$CUR_DIR/data_avro
# See https://github.com/ClickHouse/ClickHouse/issues/60438
$CLICKHOUSE_LOCAL -q "DESC file('$DATA_DIR/negative_block_size_arrays.avro')"
$CLICKHOUSE_LOCAL -q "SELECT arraySum(arrayMap(x -> length(x), str_array)) AS res FROM file('$DATA_DIR/negative_block_size_arrays.avro')"

View File

@ -1,4 +1,4 @@
personal_ws-1.1 en 2758
personal_ws-1.1 en 2942
AArch
ACLs
ALTERs
@ -1658,9 +1658,9 @@ fsync
func
fuzzBits
fuzzJSON
fuzzQuery
fuzzer
fuzzers
fuzzQuery
gRPC
gccMurmurHash
gcem
@ -2000,6 +2000,7 @@ maxMap
maxintersections
maxintersectionsposition
maxmap
minMappedArrays
maxmind
mdadm
meanZTest
@ -2017,6 +2018,7 @@ metrica
metroHash
mfedotov
minMap
minMappedArrays
minSampleSizeContinuous
minSampleSizeConversion
mindsdb