Merge branch 'master' into schema-inference-uint

This commit is contained in:
Kruglov Pavel 2022-12-30 18:16:00 +01:00 committed by GitHub
commit 69b9842bc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
286 changed files with 11503 additions and 4879 deletions

View File

@ -10,7 +10,7 @@
#include <base/MoveOrCopyIfThrow.h>
/** Pool for limited size objects that cannot be used from different threads simultaneously.
* The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime
* The main use case is to have fixed size of objects that can be reused in different threads during their lifetime
* and have to be initialized on demand.
* Two main properties of pool are allocated objects size and borrowed objects size.
* Allocated objects size is size of objects that are currently allocated by the pool.

View File

@ -8,16 +8,13 @@ set (SRCS
getPageSize.cpp
getThreadId.cpp
JSON.cpp
LineReader.cpp
mremap.cpp
phdr_cache.cpp
preciseExp10.cpp
setTerminalEcho.cpp
shift10.cpp
sleep.cpp
terminalColors.cpp
errnoToString.cpp
ReplxxLineReader.cpp
StringRef.cpp
safeExit.cpp
throwError.cpp
@ -40,11 +37,6 @@ else ()
target_compile_definitions(common PUBLIC WITH_COVERAGE=0)
endif ()
# FIXME: move libraries for line reading out from base
if (TARGET ch_rust::skim)
target_link_libraries(common PUBLIC ch_rust::skim)
endif()
target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..")
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)

View File

@ -1,28 +0,0 @@
#include <base/setTerminalEcho.h>
#include <base/errnoToString.h>
#include <stdexcept>
#include <cstring>
#include <string>
#include <termios.h>
#include <unistd.h>
void setTerminalEcho(bool enable)
{
/// Obtain terminal attributes,
/// toggle the ECHO flag
/// and set them back.
struct termios tty{};
if (0 != tcgetattr(STDIN_FILENO, &tty))
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString());
if (enable)
tty.c_lflag |= ECHO;
else
tty.c_lflag &= ~ECHO;
if (0 != tcsetattr(STDIN_FILENO, TCSANOW, &tty))
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString());
}

View File

@ -1,4 +0,0 @@
#pragma once
/// Enable or disable echoing of typed characters. Throws std::runtime_error on error.
void setTerminalEcho(bool enable);

View File

@ -80,7 +80,7 @@ do
done
# if clickhouse user is defined - create it (user "default" already exists out of box)
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ] || [ "$CLICKHOUSE_ACCESS_MANAGEMENT" != "0" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<clickhouse>

View File

@ -51,7 +51,6 @@ function clone
)
ls -lath ||:
}
function wget_with_retry
@ -385,7 +384,7 @@ th { cursor: pointer; }
<tr>
<td>AST Fuzzer</td>
<td>$(cat status.txt)</td>
<td style="white-space: pre;">$(clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt)</td>
<td style="white-space: pre;">$(clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt || cat description.txt)</td>
</tr>
</table>
</body>

View File

@ -447,7 +447,13 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
rm -rf tmp_stress_output
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
stop 1
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
@ -455,7 +461,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
# Start new server
mv package_folder/clickhouse /usr/bin/
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
export ZOOKEEPER_FAULT_INJECTION=1
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
@ -468,7 +475,7 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
@ -516,7 +523,7 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3625,7 +3625,7 @@ z IPv4
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
Default value: `false`.
Default value: `true`.
## input_format_try_infer_integers {#input_format_try_infer_integers}

View File

@ -0,0 +1,70 @@
---
slug: /en/operations/system-tables/schema_inference_cache
---
# Schema inference cache
Contains information about all cached file schemas.
Columns:
- `storage` ([String](/docs/en/sql-reference/data-types/string.md)) — Storage name: File, URL, S3 or HDFS.
- `source` ([String](/docs/en/sql-reference/data-types/string.md)) — File source.
- `format` ([String](/docs/en/sql-reference/data-types/string.md)) — Format name.
- `additional_format_info` ([String](/docs/en/sql-reference/data-types/string.md)) - Additional information required to identify the schema. For example, format specific settings.
- `registration_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Timestamp when schema was added in cache.
- `schema` ([String](/docs/en/sql-reference/data-types/string.md)) - Cached schema.
**Example**
Let's say we have a file `data.jsonl` with this content:
```json
{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]}
{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]}
{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]}
{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]}
```
:::tip
Place `data.jsonl` in the `user_files_path` directory. You can find this by looking
in your ClickHouse configuration files. The default is:
```
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
:::
Open `clickhouse-client` and run the `DESCRIBE` query:
```sql
DESCRIBE file('data.jsonl') SETTINGS input_format_try_infer_integers=0;
```
```response
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Nullable(Float64) │ │ │ │ │ │
│ age │ Nullable(Float64) │ │ │ │ │ │
│ name │ Nullable(String) │ │ │ │ │ │
│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Let's see the content of the `system.schema_inference_cache` table:
```sql
SELECT *
FROM system.schema_inference_cache
FORMAT Vertical
```
```response
Row 1:
──────
storage: File
source: /home/droscigno/user_files/data.jsonl
format: JSONEachRow
additional_format_info: schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, schema_inference_make_columns_nullable=true, try_infer_integers=false, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=true, read_bools_as_numbers=true, try_infer_objects=false
registration_time: 2022-12-29 17:49:52
schema: id Nullable(Float64), age Nullable(Float64), name Nullable(String), hobbies Array(Nullable(String))
```
**See also**
- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md)

View File

@ -14,7 +14,7 @@ ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w
## toInt(8\|16\|32\|64\|128\|256)
Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) data type. This function family includes:
Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toInt8(expr)` — Results in the `Int8` data type.
- `toInt16(expr)` — Results in the `Int16` data type.
@ -25,7 +25,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md)
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
**Returned value**
@ -33,7 +33,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
**Example**
@ -114,7 +114,7 @@ Result:
## toUInt(8\|16\|32\|64\|256)
Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md) data type. This function family includes:
Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toUInt8(expr)` — Results in the `UInt8` data type.
- `toUInt16(expr)` — Results in the `UInt16` data type.
@ -124,7 +124,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
**Returned value**
@ -132,7 +132,7 @@ Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data typ
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for negative arguments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
**Example**
@ -166,7 +166,111 @@ Result:
## toDate
Alias: `DATE`.
Converts the argument to `Date` data type.
If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime:
```sql
SELECT
now() AS x,
toDate(x)
```
```response
┌───────────────────x─┬─toDate(now())─┐
│ 2022-12-30 13:44:17 │ 2022-12-30 │
└─────────────────────┴───────────────┘
```
If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used:
```sql
SELECT
toDate('2022-12-30') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐
│ 2022-12-30 │ Date │
└────────────┴──────────────────────────────────┘
1 row in set. Elapsed: 0.001 sec.
```
```sql
SELECT
toDate('2022-12-30 01:02:03') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐
│ 2022-12-30 │ Date │
└────────────┴───────────────────────────────────────────┘
```
If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone:
```sql
SELECT
now() AS current_time,
toUnixTimestamp(current_time) AS ts,
toDateTime(ts) AS time_Amsterdam,
toDateTime(ts, 'Pacific/Apia') AS time_Samoa,
toDate(time_Amsterdam) AS date_Amsterdam,
toDate(time_Samoa) AS date_Samoa,
toDate(ts) AS date_Amsterdam_2,
toDate(ts, 'Pacific/Apia') AS date_Samoa_2
```
```response
Row 1:
──────
current_time: 2022-12-30 13:51:54
ts: 1672404714
time_Amsterdam: 2022-12-30 13:51:54
time_Samoa: 2022-12-31 01:51:54
date_Amsterdam: 2022-12-30
date_Samoa: 2022-12-31
date_Amsterdam_2: 2022-12-30
date_Samoa_2: 2022-12-31
```
The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example:
```sql
SELECT toDate(12345)
```
```response
┌─toDate(12345)─┐
│ 2003-10-20 │
└───────────────┘
```
This conversion does not depend on timezones.
If the argument does not fit in the range of the Date type, it results in an implementation-defined behavior, that can saturate to the maximum supported date or overflow:
```sql
SELECT toDate(10000000000.)
```
```response
┌─toDate(10000000000.)─┐
│ 2106-02-07 │
└──────────────────────┘
```
The function `toDate` can be also written in alternative forms:
```sql
SELECT
now() AS time,
toDate(time),
DATE(time),
CAST(time, 'Date')
```
```response
┌────────────────time─┬─toDate(now())─┬─DATE(now())─┬─CAST(now(), 'Date')─┐
│ 2022-12-30 13:54:58 │ 2022-12-30 │ 2022-12-30 │ 2022-12-30 │
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
```
Have a nice day working with dates and times.
## toDateOrZero
@ -184,7 +288,7 @@ Alias: `DATE`.
## toDate32
Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
**Syntax**
@ -194,13 +298,13 @@ toDate32(expr)
**Arguments**
- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md).
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md).
**Returned value**
- A calendar date.
Type: [Date32](../../sql-reference/data-types/date32.md).
Type: [Date32](/docs/en/sql-reference/data-types/date32.md).
**Example**
@ -242,7 +346,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
## toDate32OrZero
The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received.
The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received.
**Example**
@ -262,7 +366,7 @@ Result:
## toDate32OrNull
The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received.
The same as [toDate32](#todate32) but returns `NULL` if an invalid argument is received.
**Example**
@ -282,7 +386,7 @@ Result:
## toDate32OrDefault
Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the lower border value supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if invalid argument is received.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received.
**Example**
@ -304,7 +408,7 @@ Result:
## toDateTime64
Converts the argument to the [DateTime64](../../sql-reference/data-types/datetime64.md) data type.
Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type.
**Syntax**
@ -314,7 +418,7 @@ toDateTime64(expr, scale, [timezone])
**Arguments**
- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
- `timezone` - Time zone of the specified datetime64 object.
@ -322,7 +426,7 @@ toDateTime64(expr, scale, [timezone])
- A calendar date and time of day, with sub-second precision.
Type: [DateTime64](../../sql-reference/data-types/datetime64.md).
Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
**Example**
@ -378,7 +482,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN
## toDecimal(32\|64\|128\|256)
Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.
Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.
- `toDecimal32(value, S)`
- `toDecimal64(value, S)`
@ -387,7 +491,7 @@ Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) dat
## toDecimal(32\|64\|128\|256)OrNull
Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md) data type value. This family of functions include:
Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes:
- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type.
- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type.
@ -398,7 +502,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `S` — Scale, the number of decimal places in the resulting value.
**Returned value**
@ -441,7 +545,7 @@ Result:
## toDecimal(32\|64\|128\|256)OrDefault
Converts an input string to a [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type value. This family of functions include:
Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes:
- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type.
- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type.
@ -452,7 +556,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `S` — Scale, the number of decimal places in the resulting value.
**Returned value**
@ -494,7 +598,7 @@ Result:
## toDecimal(32\|64\|128\|256)OrZero
Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type. This family of functions include:
Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes:
- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type.
- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type.
@ -505,7 +609,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
- `S` — Scale, the number of decimal places in the resulting value.
**Returned value**
@ -564,7 +668,7 @@ YYYY-MM-DD hh:mm:ss
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing toDate(unix_timestamp), which otherwise would be an error and would require writing the more cumbersome toDate(toDateTime(unix_timestamp)).
Conversion between a date and date with time is performed the natural way: by adding a null time or dropping the time.
Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time.
Conversion between numeric types uses the same rules as assignments between different numeric types in C++.
@ -643,15 +747,15 @@ These functions accept a string and interpret the bytes placed at the beginning
## reinterpretAsString
This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
## reinterpretAsFixedString
This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
## reinterpretAsUUID
Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
@ -661,11 +765,11 @@ reinterpretAsUUID(fixed_string)
**Arguments**
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring).
**Returned value**
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type).
**Examples**
@ -718,7 +822,7 @@ reinterpret(x, type)
**Arguments**
- `x` — Any type.
- `type` — Destination type. [String](../../sql-reference/data-types/string.md).
- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
@ -757,7 +861,7 @@ x::t
**Arguments**
- `x` — A value to convert. May be of any type.
- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md).
- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md).
- `t` — The target data type.
**Returned value**
@ -806,9 +910,9 @@ Result:
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
```
Conversion to FixedString(N) only works for arguments of type [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported.
Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
**Example**
@ -844,7 +948,7 @@ Result:
**See also**
- [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting
- [cast_keep_nullable](/docs/en/operations/settings/settings.md/#cast_keep_nullable) setting
## accurateCast(x, T)
@ -882,7 +986,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c
## accurateCastOrNull(x, T)
Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type.
**Syntax**
@ -991,7 +1095,7 @@ Result:
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second)
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type.
**Syntax**
@ -1039,7 +1143,7 @@ Result:
## parseDateTimeBestEffort
## parseDateTime32BestEffort
Converts a date and time in the [String](../../sql-reference/data-types/string.md) representation to [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) data type.
Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type.
The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouses and some other date and time formats.
@ -1051,8 +1155,8 @@ parseDateTimeBestEffort(time_string [, time_zone])
**Arguments**
- `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md).
- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md).
- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md).
- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md).
**Supported non-standard formats**
@ -1175,7 +1279,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except
## parseDateTime64BestEffort
Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime) data type.
Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type.
**Syntax**
@ -1185,13 +1289,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]])
**Parameters**
- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md).
- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md).
- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
**Examples**
@ -1242,7 +1346,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
## toLowCardinality
Converts input parameter to the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) version of same data type.
Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type.
To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`.
@ -1254,7 +1358,7 @@ toLowCardinality(expr)
**Arguments**
- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types).
- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
**Returned values**
@ -1388,7 +1492,7 @@ formatRow(format, x, y, ...)
**Arguments**
- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated).
- `x`,`y`, ... — Expressions.
**Returned value**
@ -1429,7 +1533,7 @@ formatRowNoNewline(format, x, y, ...)
**Arguments**
- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated).
- `x`,`y`, ... — Expressions.
**Returned value**
@ -1457,7 +1561,7 @@ Result:
## snowflakeToDateTime
Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format.
Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](/docs/en/sql-reference/data-types/datetime.md) format.
**Syntax**
@ -1467,12 +1571,12 @@ snowflakeToDateTime(value [, time_zone])
**Parameters**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- Input value converted to the [DateTime](../data-types/datetime.md) data type.
- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
**Example**
@ -1493,7 +1597,7 @@ Result:
## snowflakeToDateTime64
Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](../data-types/datetime64.md) format.
Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format.
**Syntax**
@ -1503,12 +1607,12 @@ snowflakeToDateTime64(value [, time_zone])
**Parameters**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- Input value converted to the [DateTime64](../data-types/datetime64.md) data type.
- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type.
**Example**
@ -1529,7 +1633,7 @@ Result:
## dateTimeToSnowflake
Converts [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
Converts [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
@ -1539,11 +1643,11 @@ dateTimeToSnowflake(value)
**Parameters**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**
@ -1563,7 +1667,7 @@ Result:
## dateTime64ToSnowflake
Convert [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
Convert [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
@ -1573,11 +1677,11 @@ dateTime64ToSnowflake(value)
**Parameters**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**

View File

@ -11,7 +11,7 @@ sidebar_position: 29
这系列的引擎有:
- [StripeLog](stripelog.md)
- [日志](log.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
## 共同属性 {#table_engines-log-engine-family-common-properties}

View File

@ -13,6 +13,10 @@ set (CLICKHOUSE_CLIENT_LINK
string_utils
)
if (TARGET ch_rust::skim)
list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE ch_rust::skim)
endif()
# Always use internal readpassphrase
list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase)

View File

@ -30,9 +30,10 @@
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/UseSSL.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTDropQuery.h>
@ -41,6 +42,8 @@
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <Functions/registerFunctions.h>
@ -827,6 +830,20 @@ bool Client::processWithFuzzing(const String & full_query)
WriteBufferFromOStream ast_buf(std::cout, 4096);
formatAST(*query, ast_buf, false /*highlight*/);
ast_buf.next();
if (const auto * insert = query->as<ASTInsertQuery>())
{
/// For inserts with data it's really useful to have the data itself available in the logs, as formatAST doesn't print it
if (insert->hasInlinedData())
{
String bytes;
{
auto read_buf = getReadBufferFromASTInsertQuery(query);
WriteBufferFromString write_buf(bytes);
copyData(*read_buf, write_buf);
}
std::cout << std::endl << bytes;
}
}
std::cout << std::endl << std::endl;
try

View File

@ -18,6 +18,10 @@ if(NOT CLICKHOUSE_ONE_SHARED)
target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib)
endif()
if (TARGET ch_rust::skim)
target_link_libraries(clickhouse-local-lib PRIVATE ch_rust::skim)
endif()
# Always use internal readpassphrase
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)

View File

@ -1,647 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SymbolIndex.h>
#include <Common/ArenaAllocator.h>
#include <Core/Settings.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int FUNCTION_NOT_ALLOWED;
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
struct AggregateFunctionFlameGraphTree
{
struct ListNode;
struct TreeNode
{
TreeNode * parent = nullptr;
ListNode * children = nullptr;
UInt64 ptr = 0;
size_t allocated = 0;
};
struct ListNode
{
ListNode * next = nullptr;
TreeNode * child = nullptr;
};
TreeNode root;
static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena)
{
ListNode * list_node = reinterpret_cast<ListNode *>(arena->alloc(sizeof(ListNode)));
TreeNode * tree_node = reinterpret_cast<TreeNode *>(arena->alloc(sizeof(TreeNode)));
list_node->child = tree_node;
list_node->next = nullptr;
tree_node->parent =parent;
tree_node->children = nullptr;
tree_node->ptr = ptr;
tree_node->allocated = 0;
return list_node;
}
TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena)
{
TreeNode * node = &root;
for (size_t i = 0; i < stack_size; ++i)
{
UInt64 ptr = stack[i];
if (ptr == 0)
break;
if (!node->children)
{
node->children = createChild(node, ptr, arena);
node = node->children->child;
}
else
{
ListNode * list = node->children;
while (list->child->ptr != ptr && list->next)
list = list->next;
if (list->child->ptr != ptr)
{
list->next = createChild(node, ptr, arena);
list = list->next;
}
node = list->child;
}
}
return node;
}
static void append(DB::PaddedPODArray<UInt64> & values, DB::PaddedPODArray<UInt64> & offsets, std::vector<UInt64> & frame)
{
UInt64 prev = offsets.empty() ? 0 : offsets.back();
offsets.push_back(prev + frame.size());
for (UInt64 val : frame)
values.push_back(val);
}
struct Trace
{
using Frames = std::vector<UInt64>;
Frames frames;
/// The total number of bytes allocated for traces with the same prefix.
size_t allocated_total = 0;
/// This counter is relevant in case we want to filter some traces with small amount of bytes.
/// It shows the total number of bytes for *filtered* traces with the same prefix.
/// This is the value which is used in flamegraph.
size_t allocated_self = 0;
};
using Traces = std::vector<Trace>;
Traces dump(size_t max_depth, size_t min_bytes) const
{
Traces traces;
Trace::Frames frames;
std::vector<size_t> allocated_total;
std::vector<size_t> allocated_self;
std::vector<ListNode *> nodes;
nodes.push_back(root.children);
allocated_total.push_back(root.allocated);
allocated_self.push_back(root.allocated);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
traces.push_back({frames, allocated_total.back(), allocated_self.back()});
nodes.pop_back();
allocated_total.pop_back();
allocated_self.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
bool enough_bytes = current->allocated >= min_bytes;
bool enough_depth = max_depth == 0 || nodes.size() < max_depth;
if (enough_bytes)
{
frames.push_back(current->ptr);
allocated_self.back() -= current->allocated;
if (enough_depth)
{
allocated_total.push_back(current->allocated);
allocated_self.push_back(current->allocated);
nodes.push_back(current->children);
}
else
{
traces.push_back({frames, current->allocated, current->allocated});
frames.pop_back();
}
}
}
return traces;
}
};
static void insertData(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const char * pos, size_t length)
{
const size_t old_size = chars.size();
const size_t new_size = old_size + length + 1;
chars.resize(new_size);
if (length)
memcpy(chars.data() + old_size, pos, length);
chars[old_size + length] = 0;
offsets.push_back(new_size);
}
/// Split str by line feed and write as separate row to ColumnString.
static void fillColumn(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const std::string & str)
{
size_t start = 0;
size_t end = 0;
size_t size = str.size();
while (end < size)
{
if (str[end] == '\n')
{
insertData(chars, offsets, str.data() + start, end - start);
start = end + 1;
}
++end;
}
if (start < end)
insertData(chars, offsets, str.data() + start, end - start);
}
void dumpFlameGraph(
const AggregateFunctionFlameGraphTree::Traces & traces,
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets)
{
DB::WriteBufferFromOwnString out;
std::unordered_map<uintptr_t, size_t> mapping;
#if defined(__ELF__) && !defined(OS_FREEBSD)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
#endif
for (const auto & trace : traces)
{
if (trace.allocated_self == 0)
continue;
for (size_t i = 0; i < trace.frames.size(); ++i)
{
if (i)
out << ";";
const void * ptr = reinterpret_cast<const void *>(trace.frames[i]);
#if defined(__ELF__) && !defined(OS_FREEBSD)
if (const auto * symbol = symbol_index.findSymbol(ptr))
writeString(demangle(symbol->name), out);
else
DB::writePointerHex(ptr, out);
#else
DB::writePointerHex(ptr, out);
#endif
}
out << ' ' << trace.allocated_self << "\n";
}
fillColumn(chars, offsets, out.str());
}
struct AggregateFunctionFlameGraphData
{
struct Entry
{
AggregateFunctionFlameGraphTree::TreeNode * trace;
UInt64 size;
Entry * next = nullptr;
};
struct Pair
{
Entry * allocation = nullptr;
Entry * deallocation = nullptr;
};
using Entries = HashMap<UInt64, Pair>;
AggregateFunctionFlameGraphTree tree;
Entries entries;
Entry * free_list = nullptr;
Entry * alloc(Arena * arena)
{
if (free_list)
{
auto * res = free_list;
free_list = free_list->next;
return res;
}
return reinterpret_cast<Entry *>(arena->alloc(sizeof(Entry)));
}
void release(Entry * entry)
{
entry->next = free_list;
free_list = entry;
}
static void track(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated += allocation->size;
node = node->parent;
}
}
static void untrack(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated -= allocation->size;
node = node->parent;
}
}
static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size)
{
if (!list)
return nullptr;
if (list->size == size)
{
Entry * entry = list;
list = list->next;
return entry;
}
else
{
Entry * parent = list;
while (parent->next && parent->next->size != size)
parent = parent->next;
if (parent->next && parent->next->size == size)
{
Entry * entry = parent->next;
parent->next = entry->next;
return entry;
}
return nullptr;
}
}
void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena)
{
/// In case if argument is nullptr, only track allocations.
if (ptr == 0)
{
if (size > 0)
{
auto * node = tree.find(stack, stack_size, arena);
Entry entry{.trace = node, .size = UInt64(size)};
track(&entry);
}
return;
}
auto & place = entries[ptr];
if (size > 0)
{
if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size))
{
release(deallocation);
}
else
{
auto * node = tree.find(stack, stack_size, arena);
auto * allocation = alloc(arena);
allocation->size = UInt64(size);
allocation->trace = node;
track(allocation);
allocation->next = place.allocation;
place.allocation = allocation;
}
}
else if (size < 0)
{
UInt64 abs_size = -size;
if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size))
{
untrack(allocation);
release(allocation);
}
else
{
auto * deallocation = alloc(arena);
deallocation->size = abs_size;
deallocation->next = place.deallocation;
place.deallocation = deallocation;
}
}
}
void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
std::vector<AggregateFunctionFlameGraphTree::ListNode *> nodes;
nodes.push_back(other_tree.root.children);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
nodes.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
frames.push_back(current->ptr);
if (current->children)
nodes.push_back(current->children);
else
{
if (current->allocated)
add(0, current->allocated, frames.data(), frames.size(), arena);
frames.pop_back();
}
}
}
void merge(const AggregateFunctionFlameGraphData & other, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
for (const auto & entry : other.entries)
{
for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next)
{
frames.clear();
const auto * node = allocation->trace;
while (node->ptr)
{
frames.push_back(node->ptr);
node = node->parent;
}
std::reverse(frames.begin(), frames.end());
add(entry.value.first, allocation->size, frames.data(), frames.size(), arena);
untrack(allocation);
}
for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next)
{
add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena);
}
}
merge(other.tree, arena);
}
void dumpFlameGraph(
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets,
size_t max_depth, size_t min_bytes) const
{
DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets);
}
};
/// Aggregate function which builds a flamegraph using the list of stacktraces.
/// The output is an array of strings which can be used by flamegraph.pl util.
/// See https://github.com/brendangregg/FlameGraph
///
/// Syntax: flameGraph(traces, [size = 1], [ptr = 0])
/// - trace : Array(UInt64), a stacktrace
/// - size : Int64, an allocation size (for memory profiling)
/// - ptr : UInt64, an allocation address
/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
/// Only allocations which were not freed are shown. Not mapped deallocations are ignored.
///
/// Usage:
///
/// * Build a flamegraph based on CPU query profiler
/// set query_profiler_cpu_time_period_ns=10000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
///
/// * Build a flamegraph based on memory query profiler, showing all allocations
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
///
/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context
/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
///
/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// 1. Memory usage per second
/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time;
/// 2. Find a time point with maximal memory usage
/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample');
/// 3. Fix active allocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
/// 4. Find deallocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>
{
public:
explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>(argument_types_, {}, createResultType())
{}
String getName() const override { return "flameGraph"; }
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
bool allocatesMemoryInArena() const override { return true; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto * trace = typeid_cast<const ColumnArray *>(columns[0]);
const auto & trace_offsets = trace->getOffsets();
const auto & trace_values = typeid_cast<const ColumnUInt64 *>(&trace->getData())->getData();
UInt64 prev_offset = 0;
if (row_num)
prev_offset = trace_offsets[row_num - 1];
UInt64 trace_size = trace_offsets[row_num] - prev_offset;
Int64 allocated = 1;
if (argument_types.size() >= 2)
{
const auto & sizes = typeid_cast<const ColumnInt64 *>(columns[1])->getData();
allocated = sizes[row_num];
}
UInt64 ptr = 0;
if (argument_types.size() >= 3)
{
const auto & ptrs = typeid_cast<const ColumnUInt64 *>(columns[2])->getData();
ptr = ptrs[row_num];
}
this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
}
void addManyDefaults(
AggregateDataPtr __restrict /*place*/,
const IColumn ** /*columns*/,
size_t /*length*/,
Arena * /*arena*/) const override
{
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t> /* version */) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented.");
}
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t> /* version */, Arena *) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented.");
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & array = assert_cast<ColumnArray &>(to);
auto & str = assert_cast<ColumnString &>(array.getData());
this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
array.getOffsets().push_back(str.size());
}
};
static void check(const std::string & name, const DataTypes & argument_types, const Array & params)
{
assertNoParameters(name, params);
if (argument_types.empty() || argument_types.size() > 3)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]",
name);
auto ptr_type = std::make_shared<DataTypeUInt64>();
auto trace_type = std::make_shared<DataTypeArray>(ptr_type);
auto size_type = std::make_shared<DataTypeInt64>();
if (!argument_types[0]->equals(*trace_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument (trace) for function {} must be Array(UInt64), but it has type {}",
name, argument_types[0]->getName());
if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (size) for function {} must be Int64, but it has type {}",
name, argument_types[1]->getName());
if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument (ptr) for function {} must be UInt64, but it has type {}",
name, argument_types[2]->getName());
}
AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings)
{
if (!settings->allow_introspection_functions)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED,
"Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0");
check(name, argument_types, params);
return std::make_shared<AggregateFunctionFlameGraph>(argument_types);
}
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties });
}
}

View File

@ -73,7 +73,6 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -159,7 +158,6 @@ void registerAggregateFunctions()
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerWindowFunctions(factory);
}

View File

@ -4307,6 +4307,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
}
}
const auto & settings = scope.context->getSettingsRef();
if (function_node.isWindowFunction())
{
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
@ -4324,8 +4326,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Window function '{}' does not support lambda arguments",
function_name);
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull");
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
auto aggregate_function = need_add_or_null
? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties)
: AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
function_node.resolveAsWindowFunction(aggregate_function);
@ -4384,8 +4390,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Aggregate function '{}' does not support lambda arguments",
function_name);
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull");
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
auto aggregate_function = need_add_or_null
? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties)
: AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
function_node.resolveAsAggregateFunction(aggregate_function);
return result_projection_names;
}

View File

@ -61,14 +61,8 @@ namespace
Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, Poco::Logger * log)
{
auto address = makeSocketAddress(host, port, log);
#if POCO_VERSION < 0x01080000
socket.bind(address, /* reuseAddress = */ true);
#else
socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ false);
#endif
socket.listen(/* backlog = */ 64);
return address;
}
}

View File

@ -2,11 +2,10 @@
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/ReadHelpers.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/URI.h>
#include <filesystem>
#include <thread>
namespace fs = std::filesystem;
namespace DB
@ -97,9 +96,13 @@ std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
LOG_TRACE(getLog(), "Starting {}", serviceAlias());
/// We will terminate it with the KILL signal instead of the TERM signal,
/// because it's more reliable for arbitrary third-party ODBC drivers.
/// The drivers can spawn threads, install their own signal handlers... we don't care.
ShellCommand::Config command_config(path.string());
command_config.arguments = cmd_args;
command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true);
command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true, SIGKILL);
return ShellCommand::executeDirect(command_config);
}

View File

@ -332,6 +332,12 @@ macro (dbms_target_link_libraries)
endforeach ()
endmacro ()
macro (dbms_target_include_directories)
foreach (module ${all_modules})
target_include_directories (${module} ${ARGN})
endforeach ()
endmacro ()
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
@ -390,6 +396,7 @@ if (TARGET ch_contrib::cpuid)
endif()
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables)
# Make dbms depend on roaring instead of clickhouse_common_io so that roaring itself can depend on clickhouse_common_io
# That way we we can redirect malloc/free functions avoiding circular dependencies
@ -589,6 +596,11 @@ if (TARGET ch_contrib::annoy)
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
endif()
if (TARGET ch_rust::skim)
# Add only -I, library is needed only for clickhouse-client/clickhouse-local
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
endif()
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
if (ENABLE_TESTS)

View File

@ -1,3 +1,3 @@
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()
endif()

View File

@ -1,42 +1,30 @@
#include <Client/ClientBase.h>
#include <Client/LineReader.h>
#include <Client/ClientBaseHelpers.h>
#include <Client/TestHint.h>
#include <Client/InternalTextLogs.h>
#include <Client/TestTags.h>
#include <iostream>
#include <filesystem>
#include <map>
#include <unordered_map>
#include "config.h"
#include <base/argsToConfig.h>
#include <base/safeExit.h>
#include <Core/Block.h>
#include <Core/Protocol.h>
#include <Common/DateLUT.h>
#include <Common/MemoryTracker.h>
#include <base/argsToConfig.h>
#include <base/LineReader.h>
#include <Common/scope_guard_safe.h>
#include <base/safeExit.h>
#include <Common/Exception.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/tests/gtest_global_context.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Core/Block.h>
#include <Core/Protocol.h>
#include <Formats/FormatFactory.h>
#include <Access/AccessControl.h>
#include "config_version.h"
#include <Common/UTF8Helpers.h>
#include <Common/TerminalSize.h>
#include <Common/clearPasswordFromCommandLine.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/filesystemHelpers.h>
#include <Common/NetException.h>
#include <Storages/ColumnsDescription.h>
#include <Client/ClientBaseHelpers.h>
#include <Client/TestHint.h>
#include "TestTags.h"
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Formats/FormatFactory.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h>
@ -53,26 +41,36 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <QueryPipeline/QueryPipeline.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <QueryPipeline/QueryPipeline.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Interpreters/ProfileEventsExt.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/CompressionMethod.h>
#include <Client/InternalTextLogs.h>
#include <IO/ForkWriteBuffer.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Access/AccessControl.h>
#include <Storages/ColumnsDescription.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <iostream>
#include <filesystem>
#include <map>
#include <unordered_map>
#include "config_version.h"
#include "config.h"
namespace fs = std::filesystem;
@ -1036,7 +1034,13 @@ void ClientBase::onEndOfStream()
progress_indication.clearProgressOutput(*tty_buf);
if (output_format)
{
/// Do our best to estimate the start of the query so the output format matches the one reported by the server
bool is_running = false;
output_format->setStartTime(
clock_gettime_ns(CLOCK_MONOTONIC) - static_cast<UInt64>(progress_indication.elapsedSeconds() * 1000000000), is_running);
output_format->finalize();
}
resetOutput();

View File

@ -4,7 +4,7 @@
#include "config.h"
#if USE_REPLXX
# include <base/ReplxxLineReader.h>
# include <Client/ReplxxLineReader.h>
#endif

View File

@ -9,7 +9,6 @@
#include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Common/DNSResolver.h>
#include <base/setTerminalEcho.h>
#include <base/scope_guard.h>
#include <readpassphrase/readpassphrase.h>

View File

@ -1,4 +1,4 @@
#include <base/LineReader.h>
#include <Client/LineReader.h>
#include <iostream>
#include <string_view>
@ -65,6 +65,9 @@ void addNewWords(Words & to, const Words & from, Compare comp)
}
namespace DB
{
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length)
{
std::string_view last_word;
@ -202,3 +205,5 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt)
trim(input);
return INPUT_LINE;
}
}

View File

@ -9,6 +9,9 @@
#include <base/types.h>
#include <base/defines.h>
namespace DB
{
class LineReader
{
public:
@ -68,3 +71,5 @@ protected:
virtual InputStatus readOneLine(const String & prompt);
virtual void addToHistory(const String &) {}
};
}

View File

@ -327,9 +327,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
// Add element
if (fuzz_rand() % 50 == 0)
{
auto pos = list->children.empty()
? list->children.begin()
: list->children.begin() + fuzz_rand() % list->children.size();
auto * pos = list->children.empty() ? list->children.begin() : list->children.begin() + fuzz_rand() % list->children.size();
auto col = getRandomColumnLike();
if (col)
{
@ -373,9 +371,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
// Add element
if (fuzz_rand() % 50 == 0)
{
auto pos = impl->children.empty()
? impl->children.begin()
: impl->children.begin() + fuzz_rand() % impl->children.size();
auto * pos = impl->children.empty() ? impl->children.begin() : impl->children.begin() + fuzz_rand() % impl->children.size();
auto col = getRandomColumnLike();
if (col)
impl->children.insert(pos, col);

View File

@ -1,6 +1,10 @@
#include <base/ReplxxLineReader.h>
#include <Client/ReplxxLineReader.h>
#include <base/errnoToString.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromString.h>
#include <IO/copyData.h>
#include <stdexcept>
#include <chrono>
#include <cerrno>
@ -108,13 +112,11 @@ void writeRetry(int fd, const std::string & data)
}
std::string readFile(const std::string & path)
{
std::ifstream t(path);
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
t.seekg(0, std::ios::beg);
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
return str;
std::string out;
DB::WriteBufferFromString out_buffer(out);
DB::ReadBufferFromFile in_buffer(path);
DB::copyData(in_buffer, out_buffer);
return out;
}
/// Simple wrapper for temporary files.
@ -269,6 +271,9 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
}
namespace DB
{
static bool replxx_last_is_delimiter = false;
void ReplxxLineReader::setLastIsDelimiter(bool flag)
{
@ -508,3 +513,5 @@ void ReplxxLineReader::enableBracketedPaste()
bracketed_paste_enabled = true;
rx.enable_bracketed_paste();
}
}

View File

@ -1,9 +1,11 @@
#pragma once
#include "LineReader.h"
#include <replxx.hxx>
namespace DB
{
class ReplxxLineReader : public LineReader
{
public:
@ -36,3 +38,5 @@ private:
std::string editor;
};
}

View File

@ -5,8 +5,8 @@
#include <Client/Connection.h>
#include <Client/IServerConnection.h>
#include <Client/LocalConnection.h>
#include <Client/LineReader.h>
#include <IO/ConnectionTimeouts.h>
#include <base/LineReader.h>
#include <thread>

View File

@ -1,16 +0,0 @@
#pragma once
#include <cstddef>
/// This is a structure which is returned by MemoryTracker.
/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed.
/// For now, it will only collect allocation trace with sample_probability.
struct AllocationTrace
{
AllocationTrace() = default;
explicit AllocationTrace(double sample_probability_);
void onAlloc(void * ptr, size_t size) const;
void onFree(void * ptr, size_t size) const;
double sample_probability = 0;
};

View File

@ -92,10 +92,8 @@ public:
void * alloc(size_t size, size_t alignment = 0)
{
checkSize(size);
auto trace = CurrentMemoryTracker::alloc(size);
void * ptr = allocNoTrack(size, alignment);
trace.onAlloc(ptr, size);
return ptr;
CurrentMemoryTracker::alloc(size);
return allocNoTrack(size, alignment);
}
/// Free memory range.
@ -105,8 +103,7 @@ public:
{
checkSize(size);
freeNoTrack(buf, size);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
CurrentMemoryTracker::free(size);
}
catch (...)
{
@ -132,16 +129,13 @@ public:
&& alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
trace.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
@ -149,8 +143,7 @@ public:
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
@ -159,17 +152,14 @@ public:
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
trace.onAlloc(buf, new_size);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
trace.onAlloc(new_buf, new_size);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;

View File

@ -30,24 +30,21 @@ struct AllocatorWithMemoryTracking
throw std::bad_alloc();
size_t bytes = n * sizeof(T);
auto trace = CurrentMemoryTracker::alloc(bytes);
CurrentMemoryTracker::alloc(bytes);
T * p = static_cast<T *>(malloc(bytes));
if (!p)
throw std::bad_alloc();
trace.onAlloc(p, bytes);
return p;
}
void deallocate(T * p, size_t n) noexcept
{
size_t bytes = n * sizeof(T);
free(p);
auto trace = CurrentMemoryTracker::free(bytes);
trace.onFree(p, bytes);
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::free(bytes);
}
};

View File

@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker()
using DB::current_thread;
AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
{
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
if (unlikely(memory_tracker_always_throw_logical_error_on_allocation))
@ -55,9 +55,8 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory
if (will_be > current_thread->untracked_memory_limit)
{
auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
current_thread->untracked_memory = 0;
return res;
}
else
{
@ -69,40 +68,36 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory
/// total_memory_tracker only, ignore untracked_memory
else
{
return memory_tracker->allocImpl(size, throw_if_memory_exceeded);
memory_tracker->allocImpl(size, throw_if_memory_exceeded);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}
void CurrentMemoryTracker::check()
{
if (auto * memory_tracker = getMemoryTracker())
std::ignore = memory_tracker->allocImpl(0, true);
memory_tracker->allocImpl(0, true);
}
AllocationTrace CurrentMemoryTracker::alloc(Int64 size)
void CurrentMemoryTracker::alloc(Int64 size)
{
bool throw_if_memory_exceeded = true;
return allocImpl(size, throw_if_memory_exceeded);
allocImpl(size, throw_if_memory_exceeded);
}
AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size)
void CurrentMemoryTracker::allocNoThrow(Int64 size)
{
bool throw_if_memory_exceeded = false;
return allocImpl(size, throw_if_memory_exceeded);
allocImpl(size, throw_if_memory_exceeded);
}
AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
{
Int64 addition = new_size - old_size;
return addition > 0 ? alloc(addition) : free(-addition);
addition > 0 ? alloc(addition) : free(-addition);
}
AllocationTrace CurrentMemoryTracker::free(Int64 size)
void CurrentMemoryTracker::free(Int64 size)
{
if (auto * memory_tracker = getMemoryTracker())
{
@ -111,20 +106,15 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size)
current_thread->untracked_memory -= size;
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
{
Int64 untracked_memory = current_thread->untracked_memory;
memory_tracker->free(-current_thread->untracked_memory);
current_thread->untracked_memory = 0;
return memory_tracker->free(-untracked_memory);
}
}
/// total_memory_tracker only, ignore untracked_memory
else
{
return memory_tracker->free(size);
memory_tracker->free(size);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}

View File

@ -1,20 +1,19 @@
#pragma once
#include <base/types.h>
#include <Common/AllocationTrace.h>
/// Convenience methods, that use current thread's memory_tracker if it is available.
struct CurrentMemoryTracker
{
/// Call the following functions before calling of corresponding operations with memory allocators.
[[nodiscard]] static AllocationTrace alloc(Int64 size);
[[nodiscard]] static AllocationTrace allocNoThrow(Int64 size);
[[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size);
static void alloc(Int64 size);
static void allocNoThrow(Int64 size);
static void realloc(Int64 old_size, Int64 new_size);
/// This function should be called after memory deallocation.
[[nodiscard]] static AllocationTrace free(Int64 size);
static void free(Int64 size);
static void check();
private:
[[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded);
static void allocImpl(Int64 size, bool throw_if_memory_exceeded);
};

View File

@ -57,8 +57,7 @@ public:
}
/// Do not count guard page in memory usage.
auto trace = CurrentMemoryTracker::alloc(num_pages * page_size);
trace.onAlloc(vp, num_pages * page_size);
CurrentMemoryTracker::alloc(num_pages * page_size);
boost::context::stack_context sctx;
sctx.size = num_bytes;
@ -78,7 +77,6 @@ public:
::munmap(vp, sctx.size);
/// Do not count guard page in memory usage.
auto trace = CurrentMemoryTracker::free(sctx.size - page_size);
trace.onFree(vp, sctx.size - page_size);
CurrentMemoryTracker::free(sctx.size - page_size);
}
};

View File

@ -1,7 +1,6 @@
#include "MemoryTracker.h"
#include <IO/WriteHelpers.h>
#include <Common/SipHash.h>
#include <Common/VariableContext.h>
#include <Common/TraceSender.h>
#include <Common/Exception.h>
@ -83,53 +82,6 @@ inline std::string_view toDescription(OvercommitResult result)
}
}
bool shouldTrackAllocation(DB::Float64 probability, void * ptr)
{
return sipHash64(uintptr_t(ptr)) < std::numeric_limits<uint64_t>::max() * probability;
}
AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional<double> & sample_probability)
{
if (unlikely(sample_probability))
return AllocationTrace(*sample_probability);
return trace;
}
AllocationTrace getAllocationTrace(std::optional<double> & sample_probability)
{
if (unlikely(sample_probability))
return AllocationTrace(*sample_probability);
return AllocationTrace(0);
}
}
AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {}
void AllocationTrace::onAlloc(void * ptr, size_t size) const
{
if (likely(sample_probability == 0))
return;
if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr))
return;
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr});
}
void AllocationTrace::onFree(void * ptr, size_t size) const
{
if (likely(sample_probability == 0))
return;
if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr))
return;
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr});
}
namespace ProfileEvents
@ -183,7 +135,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const
}
AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
{
if (size < 0)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
@ -202,14 +154,9 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
{
MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker;
return updateAllocationTrace(
loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker),
sample_probability);
}
return getAllocationTrace(sample_probability);
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
return;
}
/** Using memory_order_relaxed means that if allocations are done simultaneously,
@ -236,6 +183,14 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
allocation_traced = true;
}
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size});
allocation_traced = true;
}
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability > 0.0 && fault(thread_local_rng)))
{
@ -354,22 +309,16 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed
}
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
{
MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker;
return updateAllocationTrace(
loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker),
sample_probability);
}
return getAllocationTrace(sample_probability);
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
}
void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory)
{
if (untracked_memory > 0)
std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false);
allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false);
else
std::ignore = free(-untracked_memory);
free(-untracked_memory);
}
bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
@ -388,7 +337,8 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
return false;
}
AllocationTrace MemoryTracker::free(Int64 size)
void MemoryTracker::free(Int64 size)
{
if (MemoryTrackerBlockerInThread::isBlocked(level))
{
@ -403,9 +353,15 @@ AllocationTrace MemoryTracker::free(Int64 size)
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
return updateAllocationTrace(loaded_next->free(size), sample_probability);
loaded_next->free(size);
return;
}
return getAllocationTrace(sample_probability);
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size});
}
Int64 accounted_size = size;
@ -433,15 +389,12 @@ AllocationTrace MemoryTracker::free(Int64 size)
if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed))
overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size);
AllocationTrace res = getAllocationTrace(sample_probability);
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
res = updateAllocationTrace(loaded_next->free(size), sample_probability);
loaded_next->free(size);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::sub(metric_loaded, accounted_size);
return res;
}
@ -525,14 +478,3 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value))
;
}
double MemoryTracker::getSampleProbability()
{
if (sample_probability)
return *sample_probability;
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
return loaded_next->getSampleProbability();
return 0;
}

View File

@ -2,11 +2,9 @@
#include <atomic>
#include <chrono>
#include <optional>
#include <base/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/VariableContext.h>
#include <Common/AllocationTrace.h>
#if !defined(NDEBUG)
#define MEMORY_TRACKER_DEBUG_CHECKS
@ -67,7 +65,7 @@ private:
double fault_probability = 0;
/// To randomly sample allocations and deallocations in trace_log.
std::optional<double> sample_probability;
double sample_probability = 0;
/// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
/// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
@ -92,8 +90,8 @@ private:
/// allocImpl(...) and free(...) should not be used directly
friend struct CurrentMemoryTracker;
[[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
[[nodiscard]] AllocationTrace free(Int64 size);
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
void free(Int64 size);
public:
static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage";
@ -148,8 +146,6 @@ public:
sample_probability = value;
}
double getSampleProbability();
void setProfilerStep(Int64 value)
{
profiler_step = value;

View File

@ -28,5 +28,4 @@ public:
}
friend class MemoryTracker;
friend struct AllocationTrace;
};

View File

@ -72,11 +72,11 @@ ShellCommand::~ShellCommand()
if (process_terminated_normally)
return;
LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid);
LOG_TRACE(getLogger(), "Will kill shell command pid {} with signal {}", pid, config.terminate_in_destructor_strategy.termination_signal);
int retcode = kill(pid, SIGTERM);
int retcode = kill(pid, config.terminate_in_destructor_strategy.termination_signal);
if (retcode != 0)
LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString());
LOG_WARNING(getLogger(), "Cannot kill shell command pid {}, error: '{}'", pid, errnoToString());
}
else
{

View File

@ -27,18 +27,18 @@ namespace DB
class ShellCommand final
{
public:
~ShellCommand();
struct DestructorStrategy final
{
explicit DestructorStrategy(bool terminate_in_destructor_, size_t wait_for_normal_exit_before_termination_seconds_ = 0)
: terminate_in_destructor(terminate_in_destructor_)
explicit DestructorStrategy(bool terminate_in_destructor_, int termination_signal_, size_t wait_for_normal_exit_before_termination_seconds_ = 0)
: terminate_in_destructor(terminate_in_destructor_), termination_signal(termination_signal_)
, wait_for_normal_exit_before_termination_seconds(wait_for_normal_exit_before_termination_seconds_)
{
}
bool terminate_in_destructor;
int termination_signal;
/// If terminate in destructor is true, command will wait until send SIGTERM signal to created process
size_t wait_for_normal_exit_before_termination_seconds = 0;
@ -64,7 +64,7 @@ public:
bool pipe_stdin_only = false;
DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false);
DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false, 0);
};
/// Run the command using /bin/sh -c.

View File

@ -40,6 +40,10 @@ public:
* Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy.
*/
explicit Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { start(); }
explicit Stopwatch(clockid_t clock_type_, UInt64 start_nanoseconds, bool is_running_)
: start_ns(start_nanoseconds), clock_type(clock_type_), is_running(is_running_)
{
}
void start() { start_ns = nanoseconds(); is_running = true; }
void stop() { stop_ns = nanoseconds(); is_running = false; }
@ -51,6 +55,8 @@ public:
UInt64 elapsedMilliseconds() const { return elapsedNanoseconds() / 1000000UL; }
double elapsedSeconds() const { return static_cast<double>(elapsedNanoseconds()) / 1000000000ULL; }
UInt64 getStart() { return start_ns; }
private:
UInt64 start_ns = 0;
UInt64 stop_ns = 0;

View File

@ -188,13 +188,10 @@ void ThreadStatus::updatePerformanceCounters()
}
}
void ThreadStatus::assertState(const std::initializer_list<int> & permitted_states, const char * description) const
void ThreadStatus::assertState(ThreadState permitted_state, const char * description) const
{
for (auto permitted_state : permitted_states)
{
if (getCurrentState() == permitted_state)
return;
}
if (getCurrentState() == permitted_state)
return;
if (description)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected thread state {}: {}", getCurrentState(), description);

View File

@ -87,10 +87,6 @@ public:
LogsLevel client_logs_level = LogsLevel::none;
String query;
/// Query without new lines (see toOneLineQuery())
/// Used to print in case of fatal error
/// (to avoid calling extra code in the fatal error handler)
String one_line_query;
UInt64 normalized_query_hash = 0;
std::vector<ProfileEventsCountersAndMemory> finished_threads_counters_memory;
@ -296,7 +292,7 @@ protected:
void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point<std::chrono::system_clock> now);
void assertState(const std::initializer_list<int> & permitted_states, const char * description = nullptr) const;
void assertState(ThreadState permitted_state, const char * description = nullptr) const;
private:

View File

@ -33,7 +33,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
+ sizeof(TraceType) /// trace type
+ sizeof(UInt64) /// thread_id
+ sizeof(Int64) /// size
+ sizeof(void *) /// ptr
+ sizeof(ProfileEvents::Event) /// event
+ sizeof(ProfileEvents::Count); /// increment
@ -75,7 +74,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(trace_type, out);
writePODBinary(thread_id, out);
writePODBinary(extras.size, out);
writePODBinary(UInt64(extras.ptr), out);
writePODBinary(extras.event, out);
writePODBinary(extras.increment, out);

View File

@ -28,9 +28,8 @@ class TraceSender
public:
struct Extras
{
/// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0.
/// size - for memory tracing is the amount of memory allocated; for other trace types it is 0.
Int64 size{};
void * ptr = nullptr;
/// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults.
ProfileEvents::Event event{ProfileEvents::end()};
ProfileEvents::Count increment{};

View File

@ -342,7 +342,6 @@ ZooKeeper::ZooKeeper(
default_acls.emplace_back(std::move(acl));
}
/// It makes sense (especially, for async requests) to inject a fault in two places:
/// pushRequest (before request is sent) and receiveEvent (after request was executed).
if (0 < args.send_fault_probability && args.send_fault_probability <= 1)

View File

@ -9,11 +9,7 @@ extern "C" void * clickhouse_malloc(size_t size)
{
void * res = malloc(size);
if (res)
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(res, actual_size);
}
Memory::trackMemory(size);
return res;
}
@ -21,29 +17,17 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
{
void * res = calloc(number_of_members, size);
if (res)
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(number_of_members * size, trace);
trace.onAlloc(res, actual_size);
}
Memory::trackMemory(number_of_members * size);
return res;
}
extern "C" void * clickhouse_realloc(void * ptr, size_t size)
{
if (ptr)
{
AllocationTrace trace;
size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
}
Memory::untrackMemory(ptr);
void * res = realloc(ptr, size);
if (res)
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(res, actual_size);
}
Memory::trackMemory(size);
return res;
}
@ -58,9 +42,7 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members,
extern "C" void clickhouse_free(void * ptr)
{
AllocationTrace trace;
size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr);
free(ptr);
}
@ -68,10 +50,6 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_
{
int res = posix_memalign(memptr, alignment, size);
if (res == 0)
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(*memptr, actual_size);
}
Memory::trackMemory(size);
return res;
}

View File

@ -112,19 +112,16 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align)
inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align)
{
std::size_t actual_size = getActualAllocationSize(size, align...);
trace = CurrentMemoryTracker::allocNoThrow(actual_size);
return actual_size;
CurrentMemoryTracker::allocNoThrow(actual_size);
}
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
{
std::size_t actual_size = 0;
try
{
#if USE_JEMALLOC
@ -133,26 +130,23 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio
if (likely(ptr != nullptr))
{
if constexpr (sizeof...(TAlign) == 1)
actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)));
CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))));
else
actual_size = sallocx(ptr, 0);
CurrentMemoryTracker::free(sallocx(ptr, 0));
}
#else
if (size)
actual_size = size;
CurrentMemoryTracker::free(size);
# if defined(_GNU_SOURCE)
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
else
actual_size = malloc_usable_size(ptr);
CurrentMemoryTracker::free(malloc_usable_size(ptr));
# endif
#endif
trace = CurrentMemoryTracker::free(actual_size);
}
catch (...)
{
}
return actual_size;
}
}

View File

@ -50,74 +50,50 @@ static struct InitializeJemallocZoneAllocatorForOSX
void * operator new(std::size_t size)
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newImpl(size);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new(std::size_t size, std::align_val_t align)
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newImpl(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
}
void * operator new[](std::size_t size)
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newImpl(size);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new[](std::size_t size, std::align_val_t align)
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newImpl(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
}
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newNoExept(size);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size);
return Memory::newNoExept(size);
}
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newNoExept(size);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size);
return Memory::newNoExept(size);
}
void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newNoExept(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
}
void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newNoExept(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
}
/// delete
@ -133,64 +109,48 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr
void operator delete(void * ptr) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::align_val_t align) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, 0, align);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr, std::align_val_t align) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, 0, align);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::size_t size) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, size, align);
Memory::deleteSized(ptr, size, align);
}
void operator delete[](void * ptr, std::size_t size) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept
{
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align);
trace.onFree(ptr, actual_size);
Memory::untrackMemory(ptr, size, align);
Memory::deleteSized(ptr, size, align);
}

View File

@ -148,31 +148,30 @@ std::vector<String> Settings::getAllRegisteredNames() const
void Settings::set(std::string_view name, const Field & value)
{
BaseSettings::set(name, value);
if (name == "compatibility")
applyCompatibilitySetting();
applyCompatibilitySetting(value.get<String>());
/// If we change setting that was changed by compatibility setting before
/// we should remove it from settings_changed_by_compatibility_setting,
/// otherwise the next time we will change compatibility setting
/// this setting will be changed too (and we don't want it).
else if (settings_changed_by_compatibility_setting.contains(name))
settings_changed_by_compatibility_setting.erase(name);
BaseSettings::set(name, value);
}
void Settings::applyCompatibilitySetting()
void Settings::applyCompatibilitySetting(const String & compatibility_value)
{
/// First, revert all changes applied by previous compatibility setting
for (const auto & setting_name : settings_changed_by_compatibility_setting)
resetToDefault(setting_name);
settings_changed_by_compatibility_setting.clear();
String compatibility = getString("compatibility");
/// If setting value is empty, we don't need to change settings
if (compatibility.empty())
if (compatibility_value.empty())
return;
ClickHouseVersion version(compatibility);
ClickHouseVersion version(compatibility_value);
/// Iterate through ClickHouse version in descending order and apply reversed
/// changes for each version that is higher that version from compatibility setting
for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it)

View File

@ -928,7 +928,7 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
void setDefaultValue(const String & name) { resetToDefault(name); }
private:
void applyCompatibilitySetting();
void applyCompatibilitySetting(const String & compatibility);
std::unordered_set<std::string_view> settings_changed_by_compatibility_setting;
};

View File

@ -3,6 +3,7 @@
#include <Core/Field.h>
#include <Core/Settings.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <boost/algorithm/string.hpp>
#include <map>
@ -28,7 +29,8 @@ public:
for (const auto & split_element : split)
{
size_t component;
if (!tryParse(component, split_element))
ReadBufferFromString buf(split_element);
if (!tryReadIntText(component, buf) || !buf.eof())
throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version};
components.push_back(component);
}

View File

@ -4,6 +4,7 @@
#include <Daemon/BaseDaemon.h>
#include <Daemon/SentryWriter.h>
#include <Parsers/toOneLineQuery.h>
#include <base/errnoToString.h>
#include <sys/stat.h>
@ -303,7 +304,7 @@ private:
if (auto thread_group = thread_ptr->getThreadGroup())
{
query = thread_group->one_line_query;
query = DB::toOneLineQuery(thread_group->query);
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())

View File

@ -11,7 +11,7 @@ if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
endif()
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config)
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config)
if (TARGET ch_contrib::sentry)
target_link_libraries (daemon PRIVATE ch_contrib::sentry dbms)

View File

@ -8,12 +8,58 @@
namespace DB
{
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
TypeIndexesSet getTypesIndexes(const DataTypes & types)
{
TypeIndexesSet type_indexes;
for (const auto & type : types)
type_indexes.insert(type->getTypeId());
return type_indexes;
}
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
{
TypeIndexesSet type_indexes = getTypesIndexes(types);
/// Nullable
if (type_indexes.contains(TypeIndex::Nullable))
{
std::vector<UInt8> is_nullable;
is_nullable.reserve(types.size());
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
{
is_nullable.push_back(1);
nested_types.push_back(type_nullable->getNestedType());
}
else
{
is_nullable.push_back(0);
nested_types.push_back(type);
}
}
transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
{
/// Type could be changed so it cannot be inside Nullable anymore.
if (is_nullable[i] && nested_types[i]->canBeInsideNullable())
types[i] = makeNullable(nested_types[i]);
else
types[i] = nested_types[i];
}
if (transform_complex_types)
{
/// Some types could be changed.
type_indexes = getTypesIndexes(types);
transform_complex_types(types, type_indexes);
}
return;
}
/// Arrays
if (type_indexes.contains(TypeIndex::Array))
@ -114,42 +160,6 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
return;
}
/// Nullable
if (type_indexes.contains(TypeIndex::Nullable))
{
std::vector<UInt8> is_nullable;
is_nullable.reserve(types.size());
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
{
is_nullable.push_back(1);
nested_types.push_back(type_nullable->getNestedType());
}
else
{
is_nullable.push_back(0);
nested_types.push_back(type);
}
}
transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
{
if (is_nullable[i])
types[i] = makeNullable(nested_types[i]);
else
types[i] = nested_types[i];
}
if (transform_complex_types)
transform_complex_types(types, type_indexes);
return;
}
transform_simple_types(types, type_indexes);
}

View File

@ -401,7 +401,7 @@ void DatabaseReplicated::createEmptyLogEntry(const ZooKeeperPtr & current_zookee
bool DatabaseReplicated::waitForReplicaToProcessAllEntries(UInt64 timeout_ms)
{
if (!ddl_worker)
if (!ddl_worker || is_probably_dropped)
return false;
return ddl_worker->waitForReplicaToProcessAllEntries(timeout_ms);
}
@ -473,9 +473,10 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne
chassert(!TSA_SUPPRESS_WARNING_FOR_READ(tables_metadata_digest));
TSA_SUPPRESS_WARNING_FOR_WRITE(tables_metadata_digest) = digest;
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
if (is_probably_dropped)
return;
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
ddl_worker->startup();
}
@ -491,7 +492,7 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool
LOG_TEST(log, "Current in-memory metadata digest: {}", tables_metadata_digest);
/// Database is probably being dropped
if (!local_context->getZooKeeperMetadataTransaction() && !ddl_worker->isCurrentlyActive())
if (!local_context->getZooKeeperMetadataTransaction() && (!ddl_worker || !ddl_worker->isCurrentlyActive()))
return true;
UInt64 local_digest = 0;
@ -1019,8 +1020,51 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
return ast;
}
void DatabaseReplicated::dropReplica(
DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name)
{
assert(!database || database_zookeeper_path == database->zookeeper_path);
if (full_replica_name.find('/') != std::string::npos)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name: {}", full_replica_name);
auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper();
String database_mark = zookeeper->get(database_zookeeper_path);
if (database_mark != REPLICATED_DATABASE_MARK)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} does not look like a path of Replicated database", database_zookeeper_path);
String database_replica_path = fs::path(database_zookeeper_path) / "replicas" / full_replica_name;
if (!zookeeper->exists(database_replica_path))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} does not exist (database path: {})",
full_replica_name, database_zookeeper_path);
if (zookeeper->exists(database_replica_path + "/active"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} is active, cannot drop it (database path: {})",
full_replica_name, database_zookeeper_path);
zookeeper->set(database_replica_path, DROPPED_MARK, -1);
/// Notify other replicas that cluster configuration was changed (if we can)
if (database)
database->createEmptyLogEntry(zookeeper);
zookeeper->tryRemoveRecursive(database_replica_path);
if (zookeeper->tryRemove(database_zookeeper_path + "/replicas") == Coordination::Error::ZOK)
{
/// It was the last replica, remove all metadata
zookeeper->tryRemoveRecursive(database_zookeeper_path);
}
}
void DatabaseReplicated::drop(ContextPtr context_)
{
if (is_probably_dropped)
{
/// Don't need to drop anything from ZooKeeper
DatabaseAtomic::drop(context_);
return;
}
auto current_zookeeper = getZooKeeper();
current_zookeeper->set(replica_path, DROPPED_MARK, -1);
createEmptyLogEntry(current_zookeeper);
@ -1038,8 +1082,6 @@ void DatabaseReplicated::drop(ContextPtr context_)
void DatabaseReplicated::stopReplication()
{
if (is_probably_dropped)
return;
if (ddl_worker)
ddl_worker->shutdown();
}
@ -1055,7 +1097,7 @@ void DatabaseReplicated::shutdown()
void DatabaseReplicated::dropTable(ContextPtr local_context, const String & table_name, bool sync)
{
auto txn = local_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id."));
assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id."));
if (txn && txn->isInitialQuery() && !txn->isCreateOrReplaceQuery())
{
String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);

View File

@ -77,6 +77,8 @@ public:
bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override;
static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name);
friend struct DatabaseReplicatedTask;
friend class DatabaseReplicatedDDLWorker;
private:

View File

@ -2,6 +2,7 @@
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/DDLTask.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Core/ServerUUID.h>
#include <filesystem>
namespace fs = std::filesystem;
@ -36,6 +37,13 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()
auto zookeeper = getAndSetZooKeeper();
if (database->is_readonly)
database->tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessLevel::ATTACH);
if (database->is_probably_dropped)
{
/// The flag was set in tryConnectToZooKeeperAndInitDatabase
LOG_WARNING(log, "Exiting main thread, because the database was probably dropped");
/// NOTE It will not stop cleanup thread until DDLWorker::shutdown() call (cleanup thread will just do nothing)
break;
}
initializeReplication();
initialized = true;
return true;
@ -62,6 +70,16 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
/// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep.
auto zookeeper = getAndSetZooKeeper();
/// Create "active" node (remove previous one if necessary)
String active_path = fs::path(database->replica_path) / "active";
String active_id = toString(ServerUUID::get());
zookeeper->handleEphemeralNodeExistence(active_path, active_id);
zookeeper->create(active_path, active_id, zkutil::CreateMode::Ephemeral);
active_node_holder.reset();
active_node_holder_zookeeper = zookeeper;
active_node_holder = zkutil::EphemeralNodeHolder::existing(active_path, *active_node_holder_zookeeper);
String log_ptr_str = zookeeper->get(database->replica_path + "/log_ptr");
UInt32 our_log_ptr = parse<UInt32>(log_ptr_str);
UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));

View File

@ -1,5 +1,6 @@
#pragma once
#include <Interpreters/DDLWorker.h>
#include <Common/ZooKeeper/ZooKeeper.h>
namespace DB
{
@ -49,6 +50,12 @@ private:
String current_task;
std::atomic<UInt32> logs_to_keep = std::numeric_limits<UInt32>::max();
/// EphemeralNodeHolder has reference to ZooKeeper, it may become dangling
ZooKeeperPtr active_node_holder_zookeeper;
/// It will remove "active" node when database is detached
zkutil::EphemeralNodeHolderPtr active_node_holder;
};
}

View File

@ -145,13 +145,9 @@ MongoDBDictionarySource::MongoDBDictionarySource(
connection->connect(host, port);
if (!user.empty())
{
#if POCO_VERSION >= 0x01070800
Poco::MongoDB::Database poco_db(db);
if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password");
#else
authenticate(*connection, db, user, password);
#endif
}
}
}

View File

@ -344,11 +344,14 @@ void buildPrimaryKeyConfiguration(
auto identifier_name = key_names.front();
auto it = std::find_if(children.begin(), children.end(), [&](const ASTPtr & node)
{
const ASTDictionaryAttributeDeclaration * dict_attr = node->as<const ASTDictionaryAttributeDeclaration>();
return dict_attr->name == identifier_name;
});
const auto * it = std::find_if(
children.begin(),
children.end(),
[&](const ASTPtr & node)
{
const ASTDictionaryAttributeDeclaration * dict_attr = node->as<const ASTDictionaryAttributeDeclaration>();
return dict_attr->name == identifier_name;
});
if (it == children.end())
{

View File

@ -1,259 +0,0 @@
#include "DiskDecorator.h"
#include <IO/ReadBufferFromFileBase.h>
#include <IO/WriteBufferFromFileBase.h>
namespace DB
{
DiskDecorator::DiskDecorator(const DiskPtr & delegate_)
: IDisk(/* name_= */ "<decorator>")
, delegate(delegate_)
{
}
DiskTransactionPtr DiskDecorator::createTransaction()
{
return delegate->createTransaction();
}
const String & DiskDecorator::getName() const
{
return delegate->getName();
}
ReservationPtr DiskDecorator::reserve(UInt64 bytes)
{
return delegate->reserve(bytes);
}
const String & DiskDecorator::getPath() const
{
return delegate->getPath();
}
UInt64 DiskDecorator::getTotalSpace() const
{
return delegate->getTotalSpace();
}
UInt64 DiskDecorator::getAvailableSpace() const
{
return delegate->getAvailableSpace();
}
UInt64 DiskDecorator::getUnreservedSpace() const
{
return delegate->getUnreservedSpace();
}
UInt64 DiskDecorator::getKeepingFreeSpace() const
{
return delegate->getKeepingFreeSpace();
}
bool DiskDecorator::exists(const String & path) const
{
return delegate->exists(path);
}
bool DiskDecorator::isFile(const String & path) const
{
return delegate->isFile(path);
}
bool DiskDecorator::isDirectory(const String & path) const
{
return delegate->isDirectory(path);
}
size_t DiskDecorator::getFileSize(const String & path) const
{
return delegate->getFileSize(path);
}
void DiskDecorator::createDirectory(const String & path)
{
delegate->createDirectory(path);
}
void DiskDecorator::createDirectories(const String & path)
{
delegate->createDirectories(path);
}
void DiskDecorator::clearDirectory(const String & path)
{
delegate->clearDirectory(path);
}
void DiskDecorator::moveDirectory(const String & from_path, const String & to_path)
{
delegate->moveDirectory(from_path, to_path);
}
DirectoryIteratorPtr DiskDecorator::iterateDirectory(const String & path) const
{
return delegate->iterateDirectory(path);
}
void DiskDecorator::createFile(const String & path)
{
delegate->createFile(path);
}
void DiskDecorator::moveFile(const String & from_path, const String & to_path)
{
delegate->moveFile(from_path, to_path);
}
void DiskDecorator::replaceFile(const String & from_path, const String & to_path)
{
delegate->replaceFile(from_path, to_path);
}
void DiskDecorator::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
delegate->copy(from_path, to_disk, to_path);
}
void DiskDecorator::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
delegate->copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskDecorator::listFiles(const String & path, std::vector<String> & file_names) const
{
delegate->listFiles(path, file_names);
}
std::unique_ptr<ReadBufferFromFileBase>
DiskDecorator::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{
return delegate->readFile(path, settings, read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase>
DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
{
return delegate->writeFile(path, buf_size, mode, settings);
}
void DiskDecorator::removeFile(const String & path)
{
delegate->removeFile(path);
}
void DiskDecorator::removeFileIfExists(const String & path)
{
delegate->removeFileIfExists(path);
}
void DiskDecorator::removeDirectory(const String & path)
{
delegate->removeDirectory(path);
}
void DiskDecorator::removeRecursive(const String & path)
{
delegate->removeRecursive(path);
}
void DiskDecorator::removeSharedFile(const String & path, bool keep_s3)
{
delegate->removeSharedFile(path, keep_s3);
}
void DiskDecorator::removeSharedFileIfExists(const String & path, bool keep_s3)
{
delegate->removeSharedFileIfExists(path, keep_s3);
}
void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
delegate->removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskDecorator::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
delegate->removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskDecorator::setLastModified(const String & path, const Poco::Timestamp & timestamp)
{
delegate->setLastModified(path, timestamp);
}
Poco::Timestamp DiskDecorator::getLastModified(const String & path) const
{
return delegate->getLastModified(path);
}
time_t DiskDecorator::getLastChanged(const String & path) const
{
return delegate->getLastChanged(path);
}
void DiskDecorator::setReadOnly(const String & path)
{
delegate->setReadOnly(path);
}
void DiskDecorator::createHardLink(const String & src_path, const String & dst_path)
{
delegate->createHardLink(src_path, dst_path);
}
void DiskDecorator::truncateFile(const String & path, size_t size)
{
delegate->truncateFile(path, size);
}
Executor & DiskDecorator::getExecutor()
{
return delegate->getExecutor();
}
SyncGuardPtr DiskDecorator::getDirectorySyncGuard(const String & path) const
{
return delegate->getDirectorySyncGuard(path);
}
void DiskDecorator::onFreeze(const String & path)
{
delegate->onFreeze(path);
}
void DiskDecorator::shutdown()
{
delegate->shutdown();
}
void DiskDecorator::startupImpl(ContextPtr context)
{
delegate->startupImpl(context);
}
void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map)
{
delegate->applyNewSettings(config, context, config_prefix, map);
}
DiskObjectStoragePtr DiskDecorator::createDiskObjectStorage()
{
return delegate->createDiskObjectStorage();
}
ObjectStoragePtr DiskDecorator::getObjectStorage()
{
return delegate->getObjectStorage();
}
DiskPtr DiskDecorator::getNestedDisk() const
{
if (const auto * decorator = dynamic_cast<const DiskDecorator *>(delegate.get()))
return decorator->getNestedDisk();
return delegate;
}
}

View File

@ -1,139 +0,0 @@
#pragma once
#include "Disks/IDisk.h"
namespace DB
{
/** Forwards all methods to another disk.
* Methods can be overridden by descendants.
*/
class DiskDecorator : public IDisk
{
public:
explicit DiskDecorator(const DiskPtr & delegate_);
DiskTransactionPtr createTransaction() override;
const String & getName() const override;
ReservationPtr reserve(UInt64 bytes) override;
~DiskDecorator() override = default;
const String & getPath() const override;
UInt64 getTotalSpace() const override;
UInt64 getAvailableSpace() const override;
UInt64 getUnreservedSpace() const override;
UInt64 getKeepingFreeSpace() const override;
bool exists(const String & path) const override;
bool isFile(const String & path) const override;
bool isDirectory(const String & path) const override;
size_t getFileSize(const String & path) const override;
void createDirectory(const String & path) override;
void createDirectories(const String & path) override;
void clearDirectory(const String & path) override;
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,
size_t buf_size,
WriteMode mode,
const WriteSettings & settings) override;
void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override;
void removeSharedFileIfExists(const String & path, bool keep_s3) override;
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
time_t getLastChanged(const String & path) const override;
Poco::Timestamp getLastModified(const String & path) const override;
void setReadOnly(const String & path) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void truncateFile(const String & path, size_t size) override;
int open(const String & path, mode_t mode) const;
void close(int fd) const;
void sync(int fd) const;
String getUniqueId(const String & path) const override { return delegate->getUniqueId(path); }
bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); }
DataSourceDescription getDataSourceDescription() const override { return delegate->getDataSourceDescription(); }
bool isRemote() const override { return delegate->isRemote(); }
bool isReadOnly() const override { return delegate->isReadOnly(); }
bool isWriteOnce() const override { return delegate->isWriteOnce(); }
bool supportZeroCopyReplication() const override { return delegate->supportZeroCopyReplication(); }
bool supportParallelWrite() const override { return delegate->supportParallelWrite(); }
void onFreeze(const String & path) override;
SyncGuardPtr getDirectorySyncGuard(const String & path) const override;
void shutdown() override;
void startupImpl(ContextPtr context) override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override;
bool supportsCache() const override { return delegate->supportsCache(); }
const String & getCacheBasePath() const override { return delegate->getCacheBasePath(); }
StoredObjects getStorageObjects(const String & path) const override { return delegate->getStorageObjects(path); }
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithObjectStoragePaths> & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); }
DiskObjectStoragePtr createDiskObjectStorage() override;
ObjectStoragePtr getObjectStorage() override;
NameSet getCacheLayersNames() const override { return delegate->getCacheLayersNames(); }
MetadataStoragePtr getMetadataStorage() override { return delegate->getMetadataStorage(); }
std::unordered_map<String, String> getSerializedMetadata(const std::vector<String> & file_paths) const override { return delegate->getSerializedMetadata(file_paths); }
UInt32 getRefCount(const String & path) const override { return delegate->getRefCount(path); }
void syncRevision(UInt64 revision) override { delegate->syncRevision(revision); }
UInt64 getRevision() const override { return delegate->getRevision(); }
bool supportsStat() const override { return delegate->supportsStat(); }
struct stat stat(const String & path) const override { return delegate->stat(path); }
bool supportsChmod() const override { return delegate->supportsChmod(); }
void chmod(const String & path, mode_t mode) override { delegate->chmod(path, mode); }
virtual DiskPtr getNestedDisk() const;
protected:
Executor & getExecutor() override;
DiskPtr delegate;
};
/// TODO: Current reservation mechanism leaks IDisk abstraction details.
/// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object.
class ReservationDelegate : public IReservation
{
public:
ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { }
UInt64 getSize() const override { return delegate->getSize(); }
UInt64 getUnreservedSpace() const override { return delegate->getUnreservedSpace(); }
DiskPtr getDisk(size_t) const override { return wrapper; }
Disks getDisks() const override { return {wrapper}; }
void update(UInt64 new_size) override { delegate->update(new_size); }
private:
ReservationPtr delegate;
DiskPtr wrapper;
};
}

View File

@ -209,7 +209,8 @@ DiskEncrypted::DiskEncrypted(
}
DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_)
: DiskDecorator(settings_->wrapped_disk)
: IDisk(name_)
, delegate(settings_->wrapped_disk)
, encrypted_name(name_)
, disk_path(settings_->disk_path)
, disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)

View File

@ -4,7 +4,6 @@
#if USE_SSL
#include <Disks/IDisk.h>
#include <Disks/DiskDecorator.h>
#include <Common/MultiVersion.h>
#include <Disks/FakeDiskTransaction.h>
@ -27,7 +26,7 @@ struct DiskEncryptedSettings
/// Encrypted disk ciphers all written files on the fly and writes the encrypted files to an underlying (normal) disk.
/// And when we read files from an encrypted disk it deciphers them automatically,
/// so we can work with a encrypted disk like it's a normal disk.
class DiskEncrypted : public DiskDecorator
class DiskEncrypted : public IDisk
{
public:
DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_);
@ -252,6 +251,32 @@ public:
return std::make_shared<FakeDiskTransaction>(*this);
}
UInt64 getTotalSpace() const override
{
return delegate->getTotalSpace();
}
UInt64 getAvailableSpace() const override
{
return delegate->getAvailableSpace();
}
UInt64 getUnreservedSpace() const override
{
return delegate->getUnreservedSpace();
}
bool supportZeroCopyReplication() const override
{
return delegate->supportZeroCopyReplication();
}
MetadataStoragePtr getMetadataStorage() override
{
return delegate->getMetadataStorage();
}
private:
String wrappedPath(const String & path) const
{
@ -261,6 +286,7 @@ private:
return disk_path + path;
}
DiskPtr delegate;
const String encrypted_name;
const String disk_path;
const String disk_absolute_path;

View File

@ -18,7 +18,6 @@
#include <sys/stat.h>
#include <Disks/DiskFactory.h>
#include <Disks/DiskRestartProxy.h>
#include <Common/randomSeed.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromTemporaryFile.h>
@ -775,7 +774,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)
std::shared_ptr<IDisk> disk
= std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0));
disk->startup(context, skip_access_check);
return std::make_shared<DiskRestartProxy>(disk);
return disk;
};
factory.registerDiskType("local", creator);
}

View File

@ -1,378 +0,0 @@
#include "DiskRestartProxy.h"
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/WriteBufferFromFileDecorator.h>
namespace DB
{
namespace ErrorCodes
{
extern const int DEADLOCK_AVOIDED;
}
using Millis = std::chrono::milliseconds;
using Seconds = std::chrono::seconds;
/// Holds restart read lock till buffer destruction.
class RestartAwareReadBuffer : public ReadBufferFromFileDecorator
{
public:
RestartAwareReadBuffer(const DiskRestartProxy & disk, std::unique_ptr<ReadBufferFromFileBase> impl_)
: ReadBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { }
void prefetch() override
{
swap(*impl);
impl->prefetch();
swap(*impl);
}
void setReadUntilPosition(size_t position) override
{
swap(*impl);
impl->setReadUntilPosition(position);
swap(*impl);
}
void setReadUntilEnd() override
{
swap(*impl);
impl->setReadUntilEnd();
swap(*impl);
}
String getInfoForLog() override { return impl->getInfoForLog(); }
private:
ReadLock lock;
};
/// Holds restart read lock till buffer finalize.
class RestartAwareWriteBuffer : public WriteBufferFromFileDecorator
{
public:
RestartAwareWriteBuffer(const DiskRestartProxy & disk, std::unique_ptr<WriteBuffer> impl_)
: WriteBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { }
~RestartAwareWriteBuffer() override
{
try
{
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void finalizeImpl() override
{
WriteBufferFromFileDecorator::finalizeImpl();
lock.unlock();
}
private:
ReadLock lock;
};
DiskRestartProxy::DiskRestartProxy(DiskPtr & delegate_)
: DiskDecorator(delegate_)
{}
ReservationPtr DiskRestartProxy::reserve(UInt64 bytes)
{
ReadLock lock (mutex);
auto ptr = DiskDecorator::reserve(bytes);
if (ptr)
{
auto disk_ptr = std::static_pointer_cast<DiskRestartProxy>(shared_from_this());
return std::make_unique<ReservationDelegate>(std::move(ptr), disk_ptr);
}
return ptr;
}
const String & DiskRestartProxy::getPath() const
{
ReadLock lock (mutex);
return DiskDecorator::getPath();
}
UInt64 DiskRestartProxy::getTotalSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getTotalSpace();
}
UInt64 DiskRestartProxy::getAvailableSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getAvailableSpace();
}
UInt64 DiskRestartProxy::getUnreservedSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getUnreservedSpace();
}
UInt64 DiskRestartProxy::getKeepingFreeSpace() const
{
ReadLock lock (mutex);
return DiskDecorator::getKeepingFreeSpace();
}
bool DiskRestartProxy::exists(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::exists(path);
}
bool DiskRestartProxy::isFile(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::isFile(path);
}
bool DiskRestartProxy::isDirectory(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::isDirectory(path);
}
size_t DiskRestartProxy::getFileSize(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getFileSize(path);
}
void DiskRestartProxy::createDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createDirectory(path);
}
void DiskRestartProxy::createDirectories(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createDirectories(path);
}
void DiskRestartProxy::clearDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::clearDirectory(path);
}
void DiskRestartProxy::moveDirectory(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::moveDirectory(from_path, to_path);
}
DirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::iterateDirectory(path);
}
void DiskRestartProxy::createFile(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::createFile(path);
}
void DiskRestartProxy::moveFile(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::moveFile(from_path, to_path);
}
void DiskRestartProxy::replaceFile(const String & from_path, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::replaceFile(from_path, to_path);
}
void DiskRestartProxy::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
ReadLock lock (mutex);
DiskDecorator::copy(from_path, to_disk, to_path);
}
void DiskRestartProxy::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
ReadLock lock (mutex);
DiskDecorator::copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file_names) const
{
ReadLock lock (mutex);
DiskDecorator::listFiles(path, file_names);
}
std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{
ReadLock lock (mutex);
auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size);
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
}
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
{
ReadLock lock (mutex);
auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings);
return std::make_unique<RestartAwareWriteBuffer>(*this, std::move(impl));
}
void DiskRestartProxy::removeFile(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeFile(path);
}
void DiskRestartProxy::removeFileIfExists(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeFileIfExists(path);
}
void DiskRestartProxy::removeDirectory(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeDirectory(path);
}
void DiskRestartProxy::removeRecursive(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::removeRecursive(path);
}
void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedFile(path, keep_s3);
}
void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestamp & timestamp)
{
ReadLock lock (mutex);
DiskDecorator::setLastModified(path, timestamp);
}
Poco::Timestamp DiskRestartProxy::getLastModified(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getLastModified(path);
}
void DiskRestartProxy::setReadOnly(const String & path)
{
ReadLock lock (mutex);
DiskDecorator::setReadOnly(path);
}
void DiskRestartProxy::createHardLink(const String & src_path, const String & dst_path)
{
ReadLock lock (mutex);
DiskDecorator::createHardLink(src_path, dst_path);
}
void DiskRestartProxy::truncateFile(const String & path, size_t size)
{
ReadLock lock (mutex);
DiskDecorator::truncateFile(path, size);
}
String DiskRestartProxy::getUniqueId(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getUniqueId(path);
}
bool DiskRestartProxy::checkUniqueId(const String & id) const
{
ReadLock lock (mutex);
return DiskDecorator::checkUniqueId(id);
}
const String & DiskRestartProxy::getCacheBasePath() const
{
ReadLock lock (mutex);
return DiskDecorator::getCacheBasePath();
}
StoredObjects DiskRestartProxy::getStorageObjects(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getStorageObjects(path);
}
void DiskRestartProxy::getRemotePathsRecursive(
const String & path, std::vector<LocalPathWithObjectStoragePaths> & paths_map)
{
ReadLock lock (mutex);
return DiskDecorator::getRemotePathsRecursive(path, paths_map);
}
DiskPtr DiskRestartProxy::getNestedDisk() const
{
DiskPtr delegate_copy;
{
ReadLock lock (mutex);
delegate_copy = delegate;
}
if (const auto * decorator = dynamic_cast<const DiskDecorator *>(delegate_copy.get()))
return decorator->getNestedDisk();
return delegate_copy;
}
void DiskRestartProxy::restart(ContextPtr context)
{
/// Speed up processing unhealthy requests.
DiskDecorator::shutdown();
WriteLock lock (mutex, std::defer_lock);
LOG_INFO(log, "Acquiring lock to restart disk {}", DiskDecorator::getName());
auto start_time = std::chrono::steady_clock::now();
auto lock_timeout = Seconds(120);
do
{
/// Use a small timeout to not block read operations for a long time.
if (lock.try_lock_for(Millis(10)))
break;
} while (std::chrono::steady_clock::now() - start_time < lock_timeout);
if (!lock.owns_lock())
throw Exception("Failed to acquire restart lock within timeout. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED);
LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName());
/// NOTE: access checking will cause deadlock here, so skip it.
DiskDecorator::startup(context, /* skip_access_check= */ true);
LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName());
}
}

View File

@ -1,86 +0,0 @@
#pragma once
#include "DiskDecorator.h"
#include <Common/logger_useful.h>
#include <shared_mutex>
namespace DB
{
using ReadLock = std::shared_lock<std::shared_timed_mutex>;
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
class RestartAwareReadBuffer;
class RestartAwareWriteBuffer;
/**
* Gives possibility to change underlying disk settings at runtime calling 'restart' method.
* All disk methods are protected by read-lock. Read/Write buffers produced by disk holds read-lock till buffer is finalized/destructed.
* When 'restart' method is called write-lock is acquired to make sure that no operations are running on that disk.
*/
class DiskRestartProxy : public DiskDecorator
{
public:
explicit DiskRestartProxy(DiskPtr & delegate_);
ReservationPtr reserve(UInt64 bytes) override;
const String & getPath() const override;
UInt64 getTotalSpace() const override;
UInt64 getAvailableSpace() const override;
UInt64 getUnreservedSpace() const override;
UInt64 getKeepingFreeSpace() const override;
bool exists(const String & path) const override;
bool isFile(const String & path) const override;
bool isDirectory(const String & path) const override;
size_t getFileSize(const String & path) const override;
void createDirectory(const String & path) override;
void createDirectories(const String & path) override;
void clearDirectory(const String & path) override;
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override;
void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override;
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) const override;
void setReadOnly(const String & path) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void truncateFile(const String & path, size_t size) override;
String getUniqueId(const String & path) const override;
bool checkUniqueId(const String & id) const override;
const String & getCacheBasePath() const override;
StoredObjects getStorageObjects(const String & path) const override;
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithObjectStoragePaths> & paths_map) override;
void restart(ContextPtr context);
DiskPtr getNestedDisk() const override;
private:
friend class RestartAwareReadBuffer;
friend class RestartAwareWriteBuffer;
/// Mutex to protect RW access.
mutable std::shared_timed_mutex mutex;
Poco::Logger * log = &Poco::Logger::get("DiskRestartProxy");
};
}

View File

@ -256,15 +256,15 @@ public:
/// For one local path there might be multiple remote paths in case of Log family engines.
struct LocalPathWithObjectStoragePaths
{
std::string local_path;
std::string common_prefix_for_objects;
StoredObjects objects;
{
std::string local_path;
std::string common_prefix_for_objects;
StoredObjects objects;
LocalPathWithObjectStoragePaths(
const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_)
: local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {}
};
LocalPathWithObjectStoragePaths(
const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_)
: local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {}
};
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)
{

View File

@ -1,4 +1,5 @@
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
#include "Common/Exception.h"
#if USE_AZURE_BLOB_STORAGE
@ -176,7 +177,9 @@ void AzureObjectStorage::removeObject(const StoredObject & object)
auto client_ptr = client.get();
auto delete_info = client_ptr->DeleteBlob(path);
if (!delete_info.Value.Deleted)
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path);
throw Exception(
ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}",
path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
}
void AzureObjectStorage::removeObjects(const StoredObjects & objects)
@ -187,21 +190,49 @@ void AzureObjectStorage::removeObjects(const StoredObjects & objects)
LOG_TEST(log, "Removing object: {} (total: {})", object.absolute_path, objects.size());
auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
if (!delete_info.Value.Deleted)
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", object.absolute_path);
throw Exception(
ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}",
object.absolute_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
}
}
void AzureObjectStorage::removeObjectIfExists(const StoredObject & object)
{
auto client_ptr = client.get();
auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
try
{
LOG_TEST(log, "Removing single object: {}", object.absolute_path);
auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
}
catch (const Azure::Storage::StorageException & e)
{
/// If object doesn't exist...
if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
return;
tryLogCurrentException(__PRETTY_FUNCTION__);
throw;
}
}
void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
{
auto client_ptr = client.get();
for (const auto & object : objects)
auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
{
try
{
auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
}
catch (const Azure::Storage::StorageException & e)
{
/// If object doesn't exist...
if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
return;
tryLogCurrentException(__PRETTY_FUNCTION__);
throw;
}
}
}

View File

@ -4,8 +4,6 @@
#if USE_AZURE_BLOB_STORAGE
#include <Disks/DiskRestartProxy.h>
#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
#include <Disks/ObjectStorages/DiskObjectStorage.h>
@ -51,7 +49,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access
bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
azure_blob_storage_disk->startup(context, skip_access_check);
return std::make_shared<DiskRestartProxy>(azure_blob_storage_disk);
return azure_blob_storage_disk;
};
factory.registerDiskType("azure_blob_storage", creator);

View File

@ -133,8 +133,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation
void finalize() override
{
/// The client for an object storage may do retries internally
/// and there could be a situation when a query succeeded, but the response is lost
/// due to network error or similar. And when it will retry an operation it may receive
/// a 404 HTTP code. We don't want to threat this code as a real error for deletion process
/// (e.g. throwing some exceptions) and thus we just use method `removeObjectsIfExists`
if (!delete_metadata_only && !objects_to_remove.empty())
object_storage.removeObjects(objects_to_remove);
object_storage.removeObjectsIfExist(objects_to_remove);
}
};
@ -213,8 +218,10 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
void finalize() override
{
/// Read comment inside RemoveObjectStorageOperation class
/// TL;DR Don't pay any attention to 404 status code
if (!objects_to_remove.empty())
object_storage.removeObjects(objects_to_remove);
object_storage.removeObjectsIfExist(objects_to_remove);
}
};
@ -307,7 +314,9 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end());
}
}
object_storage.removeObjects(remove_from_remote);
/// Read comment inside RemoveObjectStorageOperation class
/// TL;DR Don't pay any attention to 404 status code
object_storage.removeObjectsIfExist(remove_from_remote);
}
}
};
@ -352,8 +361,10 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat
void finalize() override
{
/// Read comment inside RemoveObjectStorageOperation class
/// TL;DR Don't pay any attention to 404 status code
if (!objects_to_remove.empty())
object_storage.removeObjects(objects_to_remove);
object_storage.removeObjectsIfExist(objects_to_remove);
}
};

View File

@ -3,7 +3,6 @@
#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
#include <Disks/DiskFactory.h>
#include <Disks/DiskRestartProxy.h>
#include <Storages/HDFS/HDFSCommon.h>
namespace DB
@ -55,7 +54,7 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check)
copy_thread_pool_size);
disk->startup(context, skip_access_check);
return std::make_shared<DiskRestartProxy>(disk);
return disk;
};
factory.registerDiskType("hdfs", creator);

View File

@ -20,7 +20,6 @@
#include <Disks/ObjectStorages/S3/ProxyListConfiguration.h>
#include <Disks/ObjectStorages/S3/ProxyResolverConfiguration.h>
#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
#include <Disks/DiskRestartProxy.h>
#include <Disks/DiskLocal.h>
namespace DB

View File

@ -11,7 +11,6 @@
#include <aws/core/client/DefaultRetryStrategy.h>
#include <base/getFQDNOrHostName.h>
#include <Disks/DiskRestartProxy.h>
#include <Disks/DiskLocal.h>
#include <Disks/ObjectStorages/IMetadataStorage.h>
#include <Disks/ObjectStorages/DiskObjectStorage.h>
@ -166,9 +165,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
s3disk->startup(context, skip_access_check);
std::shared_ptr<IDisk> disk_result = s3disk;
return std::make_shared<DiskRestartProxy>(disk_result);
return s3disk;
};
factory.registerDiskType("s3", creator);
factory.registerDiskType("s3_plain", creator);

View File

@ -427,9 +427,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
break;
case FormatSettings::EscapingRule::JSON:
result += fmt::format(
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, try_infer_objects={}",
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_objects_as_strings={}, read_numbers_as_strings={}, try_infer_objects={}",
settings.json.try_infer_numbers_from_strings,
settings.json.read_bools_as_numbers,
settings.json.read_objects_as_strings,
settings.json.read_numbers_as_strings,
settings.json.try_infer_objects);
break;
default:

View File

@ -317,6 +317,9 @@ static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr
auto current_progress = element_id->getProgressIn();
Progress read_progress{current_progress.read_rows, current_progress.read_bytes, current_progress.total_rows_to_read};
format->onProgress(read_progress);
/// Update the start of the statistics to use the start of the query, and not the creation of the format class
format->setStartTime(element_id->getQueryCPUStartTime(), true);
}
}

View File

@ -833,7 +833,7 @@ namespace
if (settings.json.try_infer_objects)
return std::make_shared<DataTypeObject>("json", true);
if (settings.json.read_objects_as_strings)
return makeNullable(std::make_shared<DataTypeString>());
return std::make_shared<DataTypeString>();
return nullptr;
}

View File

@ -88,7 +88,7 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast)
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(child.get()))
exprs[ast_with_alias->alias] = child;
}
for (auto it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it)
for (auto * it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it)
{
if (auto * union_child = (*it)->as<ASTSelectWithUnionQuery>())
visit(*union_child, exprs, with_expression_list);

View File

@ -17,7 +17,7 @@ class ComparisonGraph
{
public:
/// atomic_formulas are extracted from constraints.
explicit ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas);
explicit ComparisonGraph(const ASTs & atomic_formulas);
enum class CompareResult
{
@ -43,7 +43,7 @@ public:
bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
/// Returns all expressions from component to which @ast belongs if any.
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
ASTs getEqual(const ASTPtr & ast) const;
/// Returns constant expression from component to which @ast belongs if any.
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;
@ -52,7 +52,7 @@ public:
std::optional<std::size_t> getComponentId(const ASTPtr & ast) const;
/// Returns all expressions from component.
std::vector<ASTPtr> getComponent(size_t id) const;
ASTs getComponent(size_t id) const;
size_t getNumOfComponents() const { return graph.vertices.size(); }
@ -72,7 +72,7 @@ private:
struct EqualComponent
{
/// All these expressions are considered as equal.
std::vector<ASTPtr> asts;
ASTs asts;
std::optional<size_t> constant_index;
bool hasConstant() const;

View File

@ -31,7 +31,6 @@
#include <Storages/CompressionCodecSelector.h>
#include <Storages/StorageS3Settings.h>
#include <Disks/DiskLocal.h>
#include <Disks/DiskDecorator.h>
#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Disks/IO/ThreadPoolRemoteFSReader.h>
@ -826,8 +825,6 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
/// Check that underlying disk is local (can be wrapped in decorator)
DiskPtr disk_ptr = disk;
if (const auto * disk_decorator = dynamic_cast<const DiskDecorator *>(disk_ptr.get()))
disk_ptr = disk_decorator->getNestedDisk();
if (dynamic_cast<const DiskLocal *>(disk_ptr.get()) == nullptr)
{

View File

@ -149,7 +149,7 @@ ASTPtr makeOnExpression(const std::vector<ASTPtr> & expressions)
if (expressions.size() == 1)
return expressions[0]->clone();
std::vector<ASTPtr> arguments;
ASTs arguments;
arguments.reserve(expressions.size());
for (const auto & ast : expressions)
arguments.emplace_back(ast->clone());

View File

@ -348,7 +348,7 @@ void splitConjunctionsAst(const ASTPtr & node, ASTs & result)
ASTs splitConjunctionsAst(const ASTPtr & node)
{
std::vector<ASTPtr> result;
ASTs result;
splitConjunctionsAst(node, result);
return result;
}

View File

@ -227,7 +227,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
if ((create.storage->engine->name == "MaterializeMySQL" || create.storage->engine->name == "MaterializedMySQL")
&& !getContext()->getSettingsRef().allow_experimental_database_materialized_mysql
&& !internal)
&& !internal && !create.attach)
{
throw Exception("MaterializedMySQL is an experimental database engine. "
"Enable allow_experimental_database_materialized_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
@ -235,7 +235,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
if (create.storage->engine->name == "Replicated"
&& !getContext()->getSettingsRef().allow_experimental_database_replicated
&& !internal)
&& !internal && !create.attach)
{
throw Exception("Replicated is an experimental database engine. "
"Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
@ -243,7 +243,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
if (create.storage->engine->name == "MaterializedPostgreSQL"
&& !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql
&& !internal)
&& !internal && !create.attach)
{
throw Exception("MaterializedPostgreSQL is an experimental database engine. "
"Enable allow_experimental_database_materialized_postgresql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
@ -559,7 +559,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
ColumnsDescription res;
auto name_type_it = column_names_and_types.begin();
for (auto ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it)
for (const auto * ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it)
{
ColumnDescription column;

View File

@ -23,6 +23,7 @@ namespace ErrorCodes
{
extern const int TABLE_IS_READ_ONLY;
extern const int SUPPORT_IS_DISABLED;
extern const int BAD_ARGUMENTS;
}
@ -58,8 +59,7 @@ BlockIO InterpreterDeleteQuery::execute()
auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto merge_tree = std::dynamic_pointer_cast<MergeTreeData>(table);
if (!merge_tree)
if (table->supportsDelete())
{
/// Convert to MutationCommand
MutationCommands mutation_commands;
@ -75,39 +75,45 @@ BlockIO InterpreterDeleteQuery::execute()
table->mutate(mutation_commands, getContext());
return {};
}
else if (table->supportsLightweightDelete())
{
if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it");
if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it");
/// Convert to MutationCommand
MutationCommands mutation_commands;
MutationCommand mut_command;
/// Convert to MutationCommand
MutationCommands mutation_commands;
MutationCommand mut_command;
/// Build "UPDATE _row_exists = 0 WHERE predicate" query
mut_command.type = MutationCommand::Type::UPDATE;
mut_command.predicate = delete_query.predicate;
/// Build "UPDATE _row_exists = 0 WHERE predicate" query
mut_command.type = MutationCommand::Type::UPDATE;
mut_command.predicate = delete_query.predicate;
auto command = std::make_shared<ASTAlterCommand>();
command->type = ASTAlterCommand::UPDATE;
command->predicate = delete_query.predicate;
command->update_assignments = std::make_shared<ASTExpressionList>();
auto set_row_does_not_exist = std::make_shared<ASTAssignment>();
set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name;
auto zero_value = std::make_shared<ASTLiteral>(DB::Field(UInt8(0)));
set_row_does_not_exist->children.push_back(zero_value);
command->update_assignments->children.push_back(set_row_does_not_exist);
command->children.push_back(command->predicate);
command->children.push_back(command->update_assignments);
mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value;
mut_command.ast = command->ptr();
auto command = std::make_shared<ASTAlterCommand>();
command->type = ASTAlterCommand::UPDATE;
command->predicate = delete_query.predicate;
command->update_assignments = std::make_shared<ASTExpressionList>();
auto set_row_does_not_exist = std::make_shared<ASTAssignment>();
set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name;
auto zero_value = std::make_shared<ASTLiteral>(DB::Field(UInt8(0)));
set_row_does_not_exist->children.push_back(zero_value);
command->update_assignments->children.push_back(set_row_does_not_exist);
command->children.push_back(command->predicate);
command->children.push_back(command->update_assignments);
mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value;
mut_command.ast = command->ptr();
mutation_commands.emplace_back(mut_command);
mutation_commands.emplace_back(mut_command);
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
table->mutate(mutation_commands, getContext());
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
table->mutate(mutation_commands, getContext());
return {};
return {};
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table");
}
}
}

View File

@ -42,7 +42,6 @@
#include <Access/Common/AllowedClientHosts.h>
#include <Databases/IDatabase.h>
#include <Databases/DatabaseReplicated.h>
#include <Disks/DiskRestartProxy.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/Freeze.h>
@ -62,6 +61,7 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@ -483,6 +483,9 @@ BlockIO InterpreterSystemQuery::execute()
case Type::DROP_REPLICA:
dropReplica(query);
break;
case Type::DROP_DATABASE_REPLICA:
dropDatabaseReplica(query);
break;
case Type::SYNC_REPLICA:
syncReplica(query);
break;
@ -506,7 +509,6 @@ BlockIO InterpreterSystemQuery::execute()
break;
case Type::RESTART_DISK:
restartDisk(query.disk);
break;
case Type::FLUSH_LOGS:
{
getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS);
@ -781,6 +783,75 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora
return true;
}
void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
{
if (query.replica.empty())
throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS);
auto check_not_local_replica = [](const DatabaseReplicated * replicated, const ASTSystemQuery & query)
{
if (!query.replica_zk_path.empty() && fs::path(replicated->getZooKeeperPath()) != fs::path(query.replica_zk_path))
return;
if (replicated->getFullReplicaName() != query.replica)
return;
throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "There is a local database {}, which has the same path in ZooKeeper "
"and the same replica name. Please check the path in query. "
"If you want to drop replica of this database, use `DROP DATABASE`", replicated->getDatabaseName());
};
if (query.database)
{
getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase());
DatabasePtr database = DatabaseCatalog::instance().getDatabase(query.getDatabase());
if (auto * replicated = dynamic_cast<DatabaseReplicated *>(database.get()))
{
check_not_local_replica(replicated, query);
DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica);
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase());
LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
}
else if (query.is_drop_whole_replica)
{
auto databases = DatabaseCatalog::instance().getDatabases();
auto access = getContext()->getAccess();
bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_DROP_REPLICA);
for (auto & elem : databases)
{
DatabasePtr & database = elem.second;
auto * replicated = dynamic_cast<DatabaseReplicated *>(database.get());
if (!replicated)
continue;
if (!access_is_granted_globally && !access->isGranted(AccessType::SYSTEM_DROP_REPLICA, elem.first))
{
LOG_INFO(log, "Access {} denied, skipping database {}", "SYSTEM DROP REPLICA", elem.first);
continue;
}
check_not_local_replica(replicated, query);
DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica);
LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
}
}
else if (!query.replica_zk_path.empty())
{
getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA);
/// This check is actually redundant, but it may prevent from some user mistakes
for (auto & elem : DatabaseCatalog::instance().getDatabases())
if (auto * replicated = dynamic_cast<DatabaseReplicated *>(elem.second.get()))
check_not_local_replica(replicated, query);
DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.replica);
LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path);
}
else
throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR);
}
void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
{
getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id);
@ -840,16 +911,10 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &)
throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS);
}
void InterpreterSystemQuery::restartDisk(String & name)
[[noreturn]] void InterpreterSystemQuery::restartDisk(String &)
{
getContext()->checkAccess(AccessType::SYSTEM_RESTART_DISK);
auto disk = getContext()->getDisk(name);
if (DiskRestartProxy * restart_proxy = dynamic_cast<DiskRestartProxy*>(disk.get()))
restart_proxy->restart(getContext());
else
throw Exception("Disk " + name + " doesn't have possibility to restart", ErrorCodes::BAD_ARGUMENTS);
throw Exception("SYSTEM RESTART DISK is not supported", ErrorCodes::NOT_IMPLEMENTED);
}
@ -981,6 +1046,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
break;
}
case Type::DROP_REPLICA:
case Type::DROP_DATABASE_REPLICA:
{
required_access.emplace_back(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase(), query.getTable());
break;

View File

@ -66,8 +66,9 @@ private:
void dropReplica(ASTSystemQuery & query);
bool dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table);
void dropDatabaseReplica(ASTSystemQuery & query);
void flushDistributed(ASTSystemQuery & query);
void restartDisk(String & name);
[[noreturn]] void restartDisk(String & name);
AccessRightsElements getRequiredAccessForDDLOnCluster() const;
void startStopAction(StorageActionBlockType action_type, bool start);

View File

@ -153,7 +153,7 @@ private:
data.addTableColumns(identifier.name(), columns);
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{
IASTColumnsTransformer::transform(*it, columns);
}
@ -209,7 +209,7 @@ struct RewriteTablesVisitorData
{
if (done)
return;
std::vector<ASTPtr> new_tables{left, right};
ASTs new_tables{left, right};
ast->children.swap(new_tables);
done = true;
}

View File

@ -313,7 +313,7 @@ void LogicalExpressionsOptimizer::cleanupOrExpressions()
for (const auto & entry : garbage_map)
{
const auto * function = entry.first;
auto first_erased = entry.second;
auto * first_erased = entry.second;
auto & operands = getFunctionOperands(function);
operands.erase(first_erased, operands.end());

View File

@ -155,7 +155,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col
/// but this produce endless recursion in gcc-11, and leads to SIGSEGV
/// (see git blame for details).
auto column_name_and_type = columns_name_and_type.begin();
auto declare_column_ast = columns_definition->children.begin();
const auto * declare_column_ast = columns_definition->children.begin();
for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++)
{
const auto & declare_column = (*declare_column_ast)->as<MySQLParser::ASTDeclareColumn>();

View File

@ -69,7 +69,8 @@ static bool isUnlimitedQuery(const IAST * ast)
}
ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context)
ProcessList::EntryPtr
ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds)
{
EntryPtr res;
@ -218,7 +219,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
user_process_list.user_temp_data_on_disk, settings.max_temporary_data_on_disk_size_for_query));
}
thread_group->query = query_;
thread_group->one_line_query = toOneLineQuery(query_);
thread_group->normalized_query_hash = normalizedQueryHash<false>(query_);
/// Set query-level memory trackers
@ -243,13 +243,16 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
/// since allocation and deallocation could happen in different threads
}
auto process_it = processes.emplace(processes.end(), std::make_shared<QueryStatus>(
query_context,
query_,
client_info,
priorities.insert(static_cast<int>(settings.priority)),
std::move(thread_group),
query_kind));
auto process_it = processes.emplace(
processes.end(),
std::make_shared<QueryStatus>(
query_context,
query_,
client_info,
priorities.insert(static_cast<int>(settings.priority)),
std::move(thread_group),
query_kind,
watch_start_nanoseconds));
increaseQueryKindAmount(query_kind);
@ -344,11 +347,13 @@ QueryStatus::QueryStatus(
const ClientInfo & client_info_,
QueryPriorities::Handle && priority_handle_,
ThreadGroupStatusPtr && thread_group_,
IAST::QueryKind query_kind_)
IAST::QueryKind query_kind_,
UInt64 watch_start_nanoseconds)
: WithContext(context_)
, query(query_)
, client_info(client_info_)
, thread_group(std::move(thread_group_))
, watch(CLOCK_MONOTONIC, watch_start_nanoseconds, true)
, priority_handle(std::move(priority_handle_))
, global_overcommit_tracker(context_->getGlobalOvercommitTracker())
, query_kind(query_kind_)
@ -522,7 +527,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even
res.query = query;
res.client_info = client_info;
res.elapsed_seconds = watch.elapsedSeconds();
res.elapsed_microseconds = watch.elapsedMicroseconds();
res.is_cancelled = is_killed.load(std::memory_order_relaxed);
res.is_all_data_sent = is_all_data_sent.load(std::memory_order_relaxed);
res.read_rows = progress_in.read_rows;

View File

@ -53,7 +53,7 @@ class ProcessListEntry;
struct QueryStatusInfo
{
String query;
double elapsed_seconds;
UInt64 elapsed_microseconds;
size_t read_rows;
size_t read_bytes;
size_t total_rows;
@ -142,15 +142,14 @@ protected:
CurrentMetrics::Increment num_queries_increment;
public:
QueryStatus(
ContextPtr context_,
const String & query_,
const ClientInfo & client_info_,
QueryPriorities::Handle && priority_handle_,
ThreadGroupStatusPtr && thread_group_,
IAST::QueryKind query_kind_
);
IAST::QueryKind query_kind_,
UInt64 watch_start_nanoseconds);
~QueryStatus();
@ -221,6 +220,9 @@ public:
bool checkTimeLimit();
/// Same as checkTimeLimit but it never throws
[[nodiscard]] bool checkTimeLimitSoft();
/// Get the reference for the start of the query. Used to synchronize with other Stopwatches
UInt64 getQueryCPUStartTime() { return watch.getStart(); }
};
using QueryStatusPtr = std::shared_ptr<QueryStatus>;
@ -382,7 +384,7 @@ public:
* If timeout is passed - throw an exception.
* Don't count KILL QUERY queries.
*/
EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context);
EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds);
/// Number of currently executing queries.
size_t size() const { return processes.size(); }

View File

@ -97,7 +97,7 @@ void CurrentThread::defaultThreadDeleter()
void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
{
assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__);
assertState(ThreadState::DetachedFromQuery, __PRETTY_FUNCTION__);
/// Attach or init current thread to thread group and copy useful information from it
thread_group = thread_group_;
@ -324,7 +324,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
return;
}
assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__);
assertState(ThreadState::AttachedToQuery, __PRETTY_FUNCTION__);
finalizeQueryProfiler();
finalizePerformanceCounters();

View File

@ -97,9 +97,6 @@ void TraceCollector::run()
Int64 size;
readPODBinary(size, in);
UInt64 ptr;
readPODBinary(ptr, in);
ProfileEvents::Event event;
readPODBinary(event, in);
@ -115,7 +112,7 @@ void TraceCollector::run()
UInt64 time = static_cast<UInt64>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
UInt64 time_in_microseconds = static_cast<UInt64>((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000));
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment};
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment};
trace_log->add(element);
}
}

View File

@ -38,7 +38,6 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes()
{"query_id", std::make_shared<DataTypeString>()},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"size", std::make_shared<DataTypeInt64>()},
{"ptr", std::make_shared<DataTypeUInt64>()},
{"event", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"increment", std::make_shared<DataTypeInt64>()},
};
@ -58,7 +57,6 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertData(query_id.data(), query_id.size());
columns[i++]->insert(trace);
columns[i++]->insert(size);
columns[i++]->insert(ptr);
String event_name;
if (event != ProfileEvents::end())

View File

@ -27,10 +27,8 @@ struct TraceLogElement
UInt64 thread_id{};
String query_id{};
Array trace{};
/// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample.
/// Allocation size in bytes for TraceType::Memory.
Int64 size{};
/// Allocation ptr for TraceType::MemorySample.
UInt64 ptr{};
/// ProfileEvent for TraceType::ProfileEvent.
ProfileEvents::Event event{ProfileEvents::end()};
/// Increment of profile event for TraceType::ProfileEvent.

View File

@ -299,7 +299,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
}
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{
IASTColumnsTransformer::transform(*it, columns);
}

Some files were not shown because too many files have changed in this diff Show More