Merge branch 'master' into rewrite_uniq_to_count

This commit is contained in:
Yarik Briukhovetskyi 2023-07-21 15:30:03 +02:00 committed by GitHub
commit e7b9b42326
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
329 changed files with 6150 additions and 2375 deletions

View File

@ -306,7 +306,7 @@ namespace Net
DEFAULT_KEEP_ALIVE_TIMEOUT = 8
};
void reconnect();
virtual void reconnect();
/// Connects the underlying socket to the HTTP server.
int write(const char * buffer, std::streamsize length);

View File

@ -4,6 +4,8 @@ services:
kafka_zookeeper:
image: zookeeper:3.4.9
hostname: kafka_zookeeper
ports:
- 2181:2181
environment:
ZOO_MY_ID: 1
ZOO_PORT: 2181
@ -15,15 +17,14 @@ services:
image: confluentinc/cp-kafka:5.2.0
hostname: kafka1
ports:
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
- ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
KAFKA_ADVERTISED_HOST_NAME: kafka1
KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on:
@ -35,13 +36,38 @@ services:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry
ports:
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
- ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
depends_on:
- kafka_zookeeper
- kafka1
restart: always
security_opt:
- label:disable
schema-registry-auth:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry-auth
ports:
- ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
volumes:
- ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
depends_on:
- kafka_zookeeper
- kafka1
restart: always
security_opt:
- label:disable

View File

@ -76,6 +76,7 @@ The supported formats are:
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
@ -472,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames}
@ -1515,6 +1517,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
:::
## RowBinaryWithDefaults {#rowbinarywithdefaults}
Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used.
Examples:
```sql
:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000')
┌──x─┬─y─┐
│ 42 │ 1 │
└────┴───┘
```
For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided.
For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`.
## RowBinary format settings {#row-binary-format-settings}
- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`.

View File

@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
```
:::note ALL
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse.
Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command.
:::
## Background

View File

@ -989,6 +989,28 @@ Result
a b
```
### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
Allow to set default value to column when CSV field deserialization failed on bad value
Default value: `false`.
**Examples**
Query
```bash
./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
./clickhouse local -q "select * from test_tbl"
```
Result
```text
a 0 1971-01-01
```
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@ -1325,6 +1347,17 @@ Default value: 0.
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
Format:
``` text
http://[user:password@]machine[:port]"
```
Examples:
``` text
http://registry.example.com:8081
http://admin:secret@registry.example.com:8081
```
Default value: `Empty`.
### output_format_avro_codec {#output_format_avro_codec}

View File

@ -6,9 +6,20 @@ sidebar_label: Arithmetic
# Arithmetic Functions
The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit.
Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`.
The result of addition or multiplication of two integers is unsigned unless one of the integers is signed.
Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified
differently in the function documentation below):
- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the
two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`.
- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For
example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`.
- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`.
These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk
of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of
64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer
type.
Example:
@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0
└───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘
```
Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values.
Overflows are produced the same way as in C++.
## plus
@ -68,7 +77,7 @@ Alias: `a \* b` (operator)
## divide
Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function.
Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function.
Division by 0 returns `inf`, `-inf`, or `nan`.
@ -84,7 +93,7 @@ Alias: `a / b` (operator)
Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer.
The result has the same type as the dividend (the first parameter).
The result has the same width as the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
@ -135,7 +144,7 @@ intDivOrZero(a, b)
Calculates the remainder of the division of two values `a` by `b`.
The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number.
The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md).
The remainder is computed like in C++. Truncated division is used for negative numbers.

View File

@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
For an alternative to `age`, see function `date\_diff`.
@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `microsecond` (possible abbreviations: `us`, `u`)
- `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `microsecond` (possible abbreviations: `us`, `u`)
- `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
@ -1134,6 +1138,8 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression.
Alias: `current_timestamp`.
**Syntax**
``` sql
@ -1264,6 +1270,8 @@ Result:
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
Aliases: `curdate`, `current_date`.
## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.

View File

@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead.
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
## sipHash64 (#hash_functions-siphash64)
## sipHash64 {#hash_functions-siphash64}
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input.
1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input.
**Arguments**

View File

@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
## firstLine
Returns the first line from a multi-line string.
**Syntax**
```sql
firstLine(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- The first line of the input value or the whole value if there is no line
separators. [String](../data-types/string.md)
**Example**
```sql
select firstLine('foo\nbar\nbaz');
```
Result:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
:::
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.

View File

@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..

View File

@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
## age
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
**Синтаксис**
@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)
@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`)

View File

@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00  
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
## firstLine
Возвращает первую строку в многострочном тексте.
**Синтаксис**
```sql
firstLine(val)
```
**Аргументы**
- `val` - текст для обработки. [String](../data-types/string.md)
**Returned value**
- Первая строка текста или весь текст, если переносы строк отсутствуют.
Тип: [String](../data-types/string.md)
**Пример**
Запрос:
```sql
select firstLine('foo\nbar\nbaz');
```
Результат:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
:::
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).

View File

@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit``value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
可能的值:
- `microsecond`
- `millisecond`
- `second`
- `minute`
- `hour`

View File

@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
```
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。

View File

@ -1173,12 +1173,12 @@ void Client::processOptions(const OptionsDescription & options_description,
{
String traceparent = options["opentelemetry-traceparent"].as<std::string>();
String error;
if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error))
if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error);
}
if (options.count("opentelemetry-tracestate"))
global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as<std::string>();
global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as<std::string>();
}
@ -1238,10 +1238,9 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
ClientInfo & client_info = global_context->getClientInfo();
client_info.setInitialQuery();
client_info.quota_key = config().getString("quota_key", "");
client_info.query_kind = query_kind;
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
}

View File

@ -737,9 +737,8 @@ void LocalServer::processConfig()
for (const auto & [key, value] : prompt_substitutions)
boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
ClientInfo & client_info = global_context->getClientInfo();
client_info.setInitialQuery();
client_info.query_kind = query_kind;
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
}

View File

@ -887,6 +887,7 @@ try
#endif
global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
@ -1200,6 +1201,7 @@ try
}
global_context->setRemoteHostFilter(*config);
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);

View File

@ -866,6 +866,14 @@
-->
<!--</remote_url_allow_hosts>-->
<!-- The list of HTTP headers forbidden to use in HTTP-related storage engines and table functions.
If this section is not present in configuration, all headers are allowed.
-->
<!-- <http_forbid_headers>
<header>exact_header</header>
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
</http_forbid_headers> -->
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.

View File

@ -88,3 +88,4 @@ endfunction()
add_rust_subdirectory (BLAKE3)
add_rust_subdirectory (skim)
add_rust_subdirectory (prql)

3
rust/prql/CMakeLists.txt Normal file
View File

@ -0,0 +1,3 @@
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
target_include_directories(_ch_rust_prql INTERFACE include)
add_library(ch_rust::prql ALIAS _ch_rust_prql)

569
rust/prql/Cargo.lock generated Normal file
View File

@ -0,0 +1,569 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"serde_json",
]
[[package]]
name = "addr2line"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "backtrace"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"stacker",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "equivalent"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "indexmap"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "object"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustversion"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
[[package]]
name = "ryu"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
]
[[package]]
name = "serde_json"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sqlformat"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
dependencies = [
"log",
"serde",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

20
rust/prql/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "_ch_rust_prql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.8.1"
serde_json = "1.0"
[lib]
crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
lto = true

18
rust/prql/include/prql.h Normal file
View File

@ -0,0 +1,18 @@
#pragma once
#include <cstdint>
extern "C" {
/// Converts a PRQL query to an SQL query.
/// @param query is a pointer to the beginning of the PRQL query.
/// @param size is the size of the PRQL query.
/// @param out is a pointer to a uint8_t pointer which will be set to the beginning of the null terminated SQL query or the error message.
/// @param out_size is the size of the string pointed by `out`.
/// @returns zero in case of success, non-zero in case of failure.
int64_t prql_to_sql(const uint8_t * query, uint64_t size, uint8_t ** out, uint64_t * out_size);
/// Frees the passed in pointer which's memory was allocated by Rust allocators previously.
void prql_free_pointer(uint8_t * ptr_to_free);
} // extern "C"

56
rust/prql/src/lib.rs Normal file
View File

@ -0,0 +1,56 @@
use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
let out_size_ptr = unsafe { &mut *out_size };
*out_size_ptr = (result.len() + 1).try_into().unwrap();
assert!(!out.is_null());
let out_ptr = unsafe { &mut *out };
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
}
#[no_mangle]
pub unsafe extern "C" fn prql_to_sql(
query: *const u8,
size: u64,
out: *mut *mut u8,
out_size: *mut u64,
) -> i64 {
let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec();
let maybe_prql_query = String::from_utf8(query_vec);
if maybe_prql_query.is_err() {
set_output(
String::from("The PRQL query must be UTF-8 encoded!"),
out,
out_size,
);
return 1;
}
let prql_query = maybe_prql_query.unwrap();
let opts = &Options {
format: true,
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
};
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
Ok(sql_str) => (false, sql_str),
Err(err) => (true, err.to_string()),
};
set_output(res, out, out_size);
match is_err {
true => 1,
false => 0,
}
}
#[no_mangle]
pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));
}

204
rust/skim/Cargo.lock generated
View File

@ -42,17 +42,6 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi 0.1.19",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -104,31 +93,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "clap"
version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
dependencies = [
"atty",
"bitflags",
"clap_lex",
"indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
@ -214,9 +178,9 @@ dependencies = [
[[package]]
name = "cxx"
version = "1.0.97"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8"
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
dependencies = [
"cc",
"cxxbridge-flags",
@ -226,9 +190,9 @@ dependencies = [
[[package]]
name = "cxx-build"
version = "1.0.97"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb"
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
dependencies = [
"cc",
"codespan-reporting",
@ -236,24 +200,24 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn 2.0.23",
"syn 2.0.26",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.97"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8"
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
[[package]]
name = "cxxbridge-macro"
version = "1.0.97"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d"
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
"syn 2.0.26",
]
[[package]]
@ -359,19 +323,6 @@ version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -398,32 +349,11 @@ dependencies = [
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hermit-abi"
version = "0.1.19"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "hermit-abi"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
[[package]]
name = "iana-time-zone"
@ -454,16 +384,6 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "js-sys"
version = "0.3.64"
@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "link-cplusplus"
version = "1.0.8"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
dependencies = [
"cc",
]
@ -564,7 +484,7 @@ version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi 0.3.1",
"hermit-abi",
"libc",
]
@ -574,12 +494,6 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "os_str_bytes"
version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
[[package]]
name = "pin-utils"
version = "0.1.0"
@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.63"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.29"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
dependencies = [
"proc-macro2",
]
@ -648,9 +562,21 @@ dependencies = [
[[package]]
name = "regex"
version = "1.8.4"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
@ -659,39 +585,33 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.7.2"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "rustversion"
version = "1.0.12"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "scopeguard"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scratch"
version = "1.0.5"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1"
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
[[package]]
name = "serde"
version = "1.0.164"
version = "1.0.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
[[package]]
name = "skim"
@ -699,23 +619,19 @@ version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
dependencies = [
"atty",
"beef",
"bitflags",
"chrono",
"clap",
"crossbeam",
"defer-drop",
"derive_builder",
"env_logger",
"fuzzy-matcher",
"lazy_static",
"log",
"nix 0.25.1",
"rayon",
"regex",
"shlex",
"time 0.3.22",
"time 0.3.23",
"timer",
"tuikit",
"unicode-width",
@ -741,9 +657,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.23"
version = "2.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
dependencies = [
"proc-macro2",
"quote",
@ -770,30 +686,24 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
[[package]]
name = "thiserror"
version = "1.0.40"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.40"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
"syn 2.0.26",
]
[[package]]
@ -819,9 +729,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.22"
version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd"
checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
dependencies = [
"serde",
"time-core",
@ -858,9 +768,9 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.9"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "unicode-width"
@ -928,7 +838,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.23",
"syn 2.0.26",
"wasm-bindgen-shared",
]
@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
"syn 2.0.26",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]

View File

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
skim = "0.10.2"
skim = { version = "0.10.2", default-features = false }
cxx = "1.0.83"
term = "0.7.0"

View File

@ -76,11 +76,13 @@ public:
auto x = cache.get(params);
if (x)
{
if ((*x)->tryGetUser())
if ((*x)->getUserID() && !(*x)->tryGetUser())
cache.remove(params); /// The user has been dropped while it was in the cache.
else
return *x;
/// No user, probably the user has been dropped while it was in the cache.
cache.remove(params);
}
/// TODO: There is no need to keep the `ContextAccessCache::mutex` locked while we're calculating access rights.
auto res = std::make_shared<ContextAccess>(access_control, params);
res->initialize();
cache.add(params, res);
@ -713,35 +715,6 @@ int AccessControl::getBcryptWorkfactor() const
}
std::shared_ptr<const ContextAccess> AccessControl::getContextAccess(
const UUID & user_id,
const std::vector<UUID> & current_roles,
bool use_default_roles,
const Settings & settings,
const String & current_database,
const ClientInfo & client_info) const
{
ContextAccessParams params;
params.user_id = user_id;
params.current_roles.insert(current_roles.begin(), current_roles.end());
params.use_default_roles = use_default_roles;
params.current_database = current_database;
params.readonly = settings.readonly;
params.allow_ddl = settings.allow_ddl;
params.allow_introspection = settings.allow_introspection_functions;
params.interface = client_info.interface;
params.http_method = client_info.http_method;
params.address = client_info.current_address.host();
params.quota_key = client_info.quota_key;
/// Extract the last entry from comma separated list of X-Forwarded-For addresses.
/// Only the last proxy can be trusted (if any).
params.forwarded_address = client_info.getLastForwardedFor();
return getContextAccess(params);
}
std::shared_ptr<const ContextAccess> AccessControl::getContextAccess(const ContextAccessParams & params) const
{
return context_access_cache->getContextAccess(params);

View File

@ -25,7 +25,7 @@ namespace Poco
namespace DB
{
class ContextAccess;
struct ContextAccessParams;
class ContextAccessParams;
struct User;
using UserPtr = std::shared_ptr<const User>;
class EnabledRoles;
@ -181,14 +181,6 @@ public:
void setSettingsConstraintsReplacePrevious(bool enable) { settings_constraints_replace_previous = enable; }
bool doesSettingsConstraintsReplacePrevious() const { return settings_constraints_replace_previous; }
std::shared_ptr<const ContextAccess> getContextAccess(
const UUID & user_id,
const std::vector<UUID> & current_roles,
bool use_default_roles,
const Settings & settings,
const String & current_database,
const ClientInfo & client_info) const;
std::shared_ptr<const ContextAccess> getContextAccess(const ContextAccessParams & params) const;
std::shared_ptr<const EnabledRoles> getEnabledRoles(

View File

@ -10,6 +10,7 @@
#include <Access/EnabledSettings.h>
#include <Access/SettingsProfilesInfo.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Context.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Core/Settings.h>
@ -221,6 +222,12 @@ namespace
}
std::shared_ptr<const ContextAccess> ContextAccess::fromContext(const ContextPtr & context)
{
return context->getAccess();
}
ContextAccess::ContextAccess(const AccessControl & access_control_, const Params & params_)
: access_control(&access_control_)
, params(params_)
@ -228,48 +235,44 @@ ContextAccess::ContextAccess(const AccessControl & access_control_, const Params
}
ContextAccess::ContextAccess(FullAccess)
: is_full_access(true), access(std::make_shared<AccessRights>(AccessRights::getFullAccess())), access_with_implicit(access)
{
}
ContextAccess::~ContextAccess()
{
enabled_settings.reset();
enabled_quota.reset();
enabled_row_policies.reset();
access_with_implicit.reset();
access.reset();
roles_info.reset();
subscription_for_roles_changes.reset();
enabled_roles.reset();
subscription_for_user_change.reset();
user.reset();
}
ContextAccess::~ContextAccess() = default;
void ContextAccess::initialize()
{
std::lock_guard lock{mutex};
subscription_for_user_change = access_control->subscribeForChanges(
*params.user_id, [weak_ptr = weak_from_this()](const UUID &, const AccessEntityPtr & entity)
{
auto ptr = weak_ptr.lock();
if (!ptr)
return;
UserPtr changed_user = entity ? typeid_cast<UserPtr>(entity) : nullptr;
std::lock_guard lock2{ptr->mutex};
ptr->setUser(changed_user);
});
setUser(access_control->read<User>(*params.user_id));
std::lock_guard lock{mutex};
if (params.full_access)
{
access = std::make_shared<AccessRights>(AccessRights::getFullAccess());
access_with_implicit = access;
return;
}
if (!params.user_id)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No user in current context, it's a bug");
subscription_for_user_change = access_control->subscribeForChanges(
*params.user_id,
[weak_ptr = weak_from_this()](const UUID &, const AccessEntityPtr & entity)
{
auto ptr = weak_ptr.lock();
if (!ptr)
return;
UserPtr changed_user = entity ? typeid_cast<UserPtr>(entity) : nullptr;
std::lock_guard lock2{ptr->mutex};
ptr->setUser(changed_user);
});
setUser(access_control->read<User>(*params.user_id));
}
void ContextAccess::setUser(const UserPtr & user_) const
{
user = user_;
if (!user)
if (!user_)
{
/// User has been dropped.
user_was_dropped = true;
@ -280,6 +283,7 @@ void ContextAccess::setUser(const UserPtr & user_) const
enabled_roles = nullptr;
roles_info = nullptr;
enabled_row_policies = nullptr;
row_policies_of_initial_user = nullptr;
enabled_quota = nullptr;
enabled_settings = nullptr;
return;
@ -294,10 +298,10 @@ void ContextAccess::setUser(const UserPtr & user_) const
current_roles = user->granted_roles.findGranted(user->default_roles);
current_roles_with_admin_option = user->granted_roles.findGrantedWithAdminOption(user->default_roles);
}
else
else if (params.current_roles)
{
current_roles = user->granted_roles.findGranted(params.current_roles);
current_roles_with_admin_option = user->granted_roles.findGrantedWithAdminOption(params.current_roles);
current_roles = user->granted_roles.findGranted(*params.current_roles);
current_roles_with_admin_option = user->granted_roles.findGrantedWithAdminOption(*params.current_roles);
}
subscription_for_roles_changes.reset();
@ -309,6 +313,11 @@ void ContextAccess::setUser(const UserPtr & user_) const
});
setRolesInfo(enabled_roles->getRolesInfo());
std::optional<UUID> initial_user_id;
if (!params.initial_user.empty())
initial_user_id = access_control->find<User>(params.initial_user);
row_policies_of_initial_user = initial_user_id ? access_control->tryGetDefaultRowPolicies(*initial_user_id) : nullptr;
}
@ -316,12 +325,15 @@ void ContextAccess::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> &
{
assert(roles_info_);
roles_info = roles_info_;
enabled_row_policies = access_control->getEnabledRowPolicies(
*params.user_id, roles_info->enabled_roles);
enabled_row_policies = access_control->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles);
enabled_quota = access_control->getEnabledQuota(
*params.user_id, user_name, roles_info->enabled_roles, params.address, params.forwarded_address, params.quota_key);
enabled_settings = access_control->getEnabledSettings(
*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles);
calculateAccessRights();
}
@ -381,21 +393,24 @@ std::shared_ptr<const EnabledRolesInfo> ContextAccess::getRolesInfo() const
return no_roles;
}
std::shared_ptr<const EnabledRowPolicies> ContextAccess::getEnabledRowPolicies() const
RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
{
std::lock_guard lock{mutex};
if (enabled_row_policies)
return enabled_row_policies;
static const auto no_row_policies = std::make_shared<EnabledRowPolicies>();
return no_row_policies;
}
RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter) const
{
std::lock_guard lock{mutex};
RowPolicyFilterPtr filter;
if (enabled_row_policies)
return enabled_row_policies->getFilter(database, table_name, filter_type, combine_with_filter);
return combine_with_filter;
filter = enabled_row_policies->getFilter(database, table_name, filter_type);
if (row_policies_of_initial_user)
{
/// Find and set extra row policies to be used based on `client_info.initial_user`, if the initial user exists.
/// TODO: we need a better solution here. It seems we should pass the initial row policy
/// because a shard is allowed to not have the initial user or it might be another user
/// with the same name.
filter = row_policies_of_initial_user->getFilter(database, table_name, filter_type, filter);
}
return filter;
}
std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
@ -417,14 +432,6 @@ std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
}
std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
{
static const std::shared_ptr<const ContextAccess> res =
[] { return std::shared_ptr<ContextAccess>(new ContextAccess{kFullAccess}); }();
return res;
}
SettingsChanges ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
@ -478,7 +485,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
throw Exception(ErrorCodes::UNKNOWN_USER, "{}: User has been dropped", getUserName());
}
if (is_full_access)
if (params.full_access)
return true;
auto access_granted = [&]
@ -706,7 +713,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
return false;
};
if (is_full_access)
if (params.full_access)
return true;
if (user_was_dropped)
@ -806,7 +813,7 @@ void ContextAccess::checkAdminOption(const std::vector<UUID> & role_ids, const s
void ContextAccess::checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const
{
if (is_full_access)
if (params.full_access)
return;
auto current_user = getUser();
@ -816,7 +823,7 @@ void ContextAccess::checkGranteeIsAllowed(const UUID & grantee_id, const IAccess
void ContextAccess::checkGranteesAreAllowed(const std::vector<UUID> & grantee_ids) const
{
if (is_full_access)
if (params.full_access)
return;
auto current_user = getUser();

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/AccessRights.h>
#include <Access/ContextAccessParams.h>
#include <Access/EnabledRowPolicies.h>
#include <Interpreters/ClientInfo.h>
#include <Core/UUID.h>
@ -30,47 +31,18 @@ class AccessControl;
class IAST;
struct IAccessEntity;
using ASTPtr = std::shared_ptr<IAST>;
struct ContextAccessParams
{
std::optional<UUID> user_id;
boost::container::flat_set<UUID> current_roles;
bool use_default_roles = false;
UInt64 readonly = 0;
bool allow_ddl = false;
bool allow_introspection = false;
String current_database;
ClientInfo::Interface interface = ClientInfo::Interface::TCP;
ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN;
Poco::Net::IPAddress address;
String forwarded_address;
String quota_key;
auto toTuple() const
{
return std::tie(
user_id, current_roles, use_default_roles, readonly, allow_ddl, allow_introspection,
current_database, interface, http_method, address, forwarded_address, quota_key);
}
friend bool operator ==(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() == rhs.toTuple(); }
friend bool operator !=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs == rhs); }
friend bool operator <(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() < rhs.toTuple(); }
friend bool operator >(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return rhs < lhs; }
friend bool operator <=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(rhs < lhs); }
friend bool operator >=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs < rhs); }
};
class Context;
using ContextPtr = std::shared_ptr<const Context>;
class ContextAccess : public std::enable_shared_from_this<ContextAccess>
{
public:
static std::shared_ptr<const ContextAccess> fromContext(const ContextPtr & context);
using Params = ContextAccessParams;
const Params & getParams() const { return params; }
ContextAccess(const AccessControl & access_control_, const Params & params_);
/// Returns the current user. Throws if user is nullptr.
UserPtr getUser() const;
/// Same as above, but can return nullptr.
@ -81,12 +53,9 @@ public:
/// Returns information about current and enabled roles.
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;
/// Returns information about enabled row policies.
std::shared_ptr<const EnabledRowPolicies> getEnabledRowPolicies() const;
/// Returns the row policy filter for a specified table.
/// The function returns nullptr if there is no filter to apply.
RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter = {}) const;
RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;
/// Returns the quota to track resource consumption.
std::shared_ptr<const EnabledQuota> getQuota() const;
@ -161,22 +130,12 @@ public:
/// Checks if grantees are allowed for the current user, throws an exception if not.
void checkGranteesAreAllowed(const std::vector<UUID> & grantee_ids) const;
/// Makes an instance of ContextAccess which provides full access to everything
/// without any limitations. This is used for the global context.
static std::shared_ptr<const ContextAccess> getFullAccess();
ContextAccess(const AccessControl & access_control_, const Params & params_);
~ContextAccess();
private:
friend class AccessControl;
struct FullAccess {};
static const FullAccess kFullAccess;
/// Makes an instance of ContextAccess which provides full access to everything
/// without any limitations. This is used for the global context.
explicit ContextAccess(FullAccess);
void initialize();
void setUser(const UserPtr & user_) const TSA_REQUIRES(mutex);
void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & roles_info_) const TSA_REQUIRES(mutex);
@ -223,7 +182,6 @@ private:
const AccessControl * access_control = nullptr;
const Params params;
const bool is_full_access = false;
mutable std::atomic<bool> user_was_dropped = false;
mutable std::atomic<Poco::Logger *> trace_log = nullptr;
@ -237,6 +195,7 @@ private:
mutable std::shared_ptr<const AccessRights> access TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const AccessRights> access_with_implicit TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledRowPolicies> enabled_row_policies TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledRowPolicies> row_policies_of_initial_user TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledQuota> enabled_quota TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledSettings> enabled_settings TSA_GUARDED_BY(mutex);

View File

@ -0,0 +1,177 @@
#include <Access/ContextAccessParams.h>
#include <Core/Settings.h>
#include <Common/typeid_cast.h>
namespace DB
{
ContextAccessParams::ContextAccessParams(
const std::optional<UUID> user_id_,
bool full_access_,
bool use_default_roles_,
const std::shared_ptr<const std::vector<UUID>> & current_roles_,
const Settings & settings_,
const String & current_database_,
const ClientInfo & client_info_)
: user_id(user_id_)
, full_access(full_access_)
, use_default_roles(use_default_roles_)
, current_roles(current_roles_)
, readonly(settings_.readonly)
, allow_ddl(settings_.allow_ddl)
, allow_introspection(settings_.allow_introspection_functions)
, current_database(current_database_)
, interface(client_info_.interface)
, http_method(client_info_.http_method)
, address(client_info_.current_address.host())
, forwarded_address(client_info_.getLastForwardedFor())
, quota_key(client_info_.quota_key)
, initial_user((client_info_.initial_user != client_info_.current_user) ? client_info_.initial_user : "")
{
}
String ContextAccessParams::toString() const
{
WriteBufferFromOwnString out;
auto separator = [&] { return out.stringView().empty() ? "" : ", "; };
if (user_id)
out << separator() << "user_id = " << *user_id;
if (full_access)
out << separator() << "full_access = " << full_access;
if (use_default_roles)
out << separator() << "use_default_roles = " << use_default_roles;
if (current_roles && !current_roles->empty())
{
out << separator() << "current_roles = [";
for (size_t i = 0; i != current_roles->size(); ++i)
{
if (i)
out << ", ";
out << (*current_roles)[i];
}
out << "]";
}
if (readonly)
out << separator() << "readonly = " << readonly;
if (allow_ddl)
out << separator() << "allow_ddl = " << allow_ddl;
if (allow_introspection)
out << separator() << "allow_introspection = " << allow_introspection;
if (!current_database.empty())
out << separator() << "current_database = " << current_database;
out << separator() << "interface = " << magic_enum::enum_name(interface);
if (http_method != ClientInfo::HTTPMethod::UNKNOWN)
out << separator() << "http_method = " << magic_enum::enum_name(http_method);
if (!address.isWildcard())
out << separator() << "address = " << address.toString();
if (!forwarded_address.empty())
out << separator() << "forwarded_address = " << forwarded_address;
if (!quota_key.empty())
out << separator() << "quota_key = " << quota_key;
if (!initial_user.empty())
out << separator() << "initial_user = " << initial_user;
return out.str();
}
bool operator ==(const ContextAccessParams & left, const ContextAccessParams & right)
{
auto check_equals = [](const auto & x, const auto & y)
{
if constexpr (::detail::is_shared_ptr_v<std::remove_cvref_t<decltype(x)>>)
{
if (!x)
return !y;
else if (!y)
return false;
else
return *x == *y;
}
else
{
return x == y;
}
};
#define CONTEXT_ACCESS_PARAMS_EQUALS(name) \
if (!check_equals(left.name, right.name)) \
return false;
CONTEXT_ACCESS_PARAMS_EQUALS(user_id)
CONTEXT_ACCESS_PARAMS_EQUALS(full_access)
CONTEXT_ACCESS_PARAMS_EQUALS(use_default_roles)
CONTEXT_ACCESS_PARAMS_EQUALS(current_roles)
CONTEXT_ACCESS_PARAMS_EQUALS(readonly)
CONTEXT_ACCESS_PARAMS_EQUALS(allow_ddl)
CONTEXT_ACCESS_PARAMS_EQUALS(allow_introspection)
CONTEXT_ACCESS_PARAMS_EQUALS(current_database)
CONTEXT_ACCESS_PARAMS_EQUALS(interface)
CONTEXT_ACCESS_PARAMS_EQUALS(http_method)
CONTEXT_ACCESS_PARAMS_EQUALS(address)
CONTEXT_ACCESS_PARAMS_EQUALS(forwarded_address)
CONTEXT_ACCESS_PARAMS_EQUALS(quota_key)
CONTEXT_ACCESS_PARAMS_EQUALS(initial_user)
#undef CONTEXT_ACCESS_PARAMS_EQUALS
return true; /// All fields are equal, operator == must return true.
}
bool operator <(const ContextAccessParams & left, const ContextAccessParams & right)
{
auto check_less = [](const auto & x, const auto & y)
{
if constexpr (::detail::is_shared_ptr_v<std::remove_cvref_t<decltype(x)>>)
{
if (!x)
return y ? -1 : 0;
else if (!y)
return 1;
else if (*x == *y)
return 0;
else if (*x < *y)
return -1;
else
return 1;
}
else
{
if (x == y)
return 0;
else if (x < y)
return -1;
else
return 1;
}
};
#define CONTEXT_ACCESS_PARAMS_LESS(name) \
if (auto cmp = check_less(left.name, right.name); cmp != 0) \
return cmp < 0;
CONTEXT_ACCESS_PARAMS_LESS(user_id)
CONTEXT_ACCESS_PARAMS_LESS(full_access)
CONTEXT_ACCESS_PARAMS_LESS(use_default_roles)
CONTEXT_ACCESS_PARAMS_LESS(current_roles)
CONTEXT_ACCESS_PARAMS_LESS(readonly)
CONTEXT_ACCESS_PARAMS_LESS(allow_ddl)
CONTEXT_ACCESS_PARAMS_LESS(allow_introspection)
CONTEXT_ACCESS_PARAMS_LESS(current_database)
CONTEXT_ACCESS_PARAMS_LESS(interface)
CONTEXT_ACCESS_PARAMS_LESS(http_method)
CONTEXT_ACCESS_PARAMS_LESS(address)
CONTEXT_ACCESS_PARAMS_LESS(forwarded_address)
CONTEXT_ACCESS_PARAMS_LESS(quota_key)
CONTEXT_ACCESS_PARAMS_LESS(initial_user)
#undef CONTEXT_ACCESS_PARAMS_LESS
return false; /// All fields are equal, operator < must return false.
}
bool ContextAccessParams::dependsOnSettingName(std::string_view setting_name)
{
return (setting_name == "readonly") || (setting_name == "allow_ddl") || (setting_name == "allow_introspection_functions");
}
}

View File

@ -0,0 +1,67 @@
#pragma once
#include <Interpreters/ClientInfo.h>
#include <Core/UUID.h>
#include <optional>
#include <vector>
namespace DB
{
struct Settings;
/// Parameters which are used to calculate access rights and some related stuff like roles or constraints.
class ContextAccessParams
{
public:
ContextAccessParams(
const std::optional<UUID> user_id_,
bool full_access_,
bool use_default_roles_,
const std::shared_ptr<const std::vector<UUID>> & current_roles_,
const Settings & settings_,
const String & current_database_,
const ClientInfo & client_info_);
const std::optional<UUID> user_id;
/// Full access to everything without any limitations.
/// This is used for the global context.
const bool full_access;
const bool use_default_roles;
const std::shared_ptr<const std::vector<UUID>> current_roles;
const UInt64 readonly;
const bool allow_ddl;
const bool allow_introspection;
const String current_database;
const ClientInfo::Interface interface;
const ClientInfo::HTTPMethod http_method;
const Poco::Net::IPAddress address;
/// The last entry from comma separated list of X-Forwarded-For addresses.
/// Only the last proxy can be trusted (if any).
const String forwarded_address;
const String quota_key;
/// Initial user is used to combine row policies with.
const String initial_user;
/// Outputs `ContextAccessParams` to string for logging.
String toString() const;
friend bool operator <(const ContextAccessParams & left, const ContextAccessParams & right);
friend bool operator ==(const ContextAccessParams & left, const ContextAccessParams & right);
friend bool operator !=(const ContextAccessParams & left, const ContextAccessParams & right) { return !(left == right); }
friend bool operator >(const ContextAccessParams & left, const ContextAccessParams & right) { return right < left; }
friend bool operator <=(const ContextAccessParams & left, const ContextAccessParams & right) { return !(right < left); }
friend bool operator >=(const ContextAccessParams & left, const ContextAccessParams & right) { return !(left < right); }
static bool dependsOnSettingName(std::string_view setting_name);
};
}

View File

@ -46,6 +46,7 @@
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h>
@ -72,6 +73,7 @@
#include <iostream>
#include <filesystem>
#include <map>
#include <memory>
#include <unordered_map>
#include "config_version.h"
@ -338,6 +340,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth);
else
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);

View File

@ -105,6 +105,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
for (auto it = addresses.begin(); it != addresses.end();)
{
have_more_addresses_to_connect = it != std::prev(addresses.end());
if (connected)
disconnect();

View File

@ -159,6 +159,8 @@ public:
out->setAsyncCallback(async_callback);
}
bool haveMoreAddressesToConnect() const { return have_more_addresses_to_connect; }
private:
String host;
UInt16 port;
@ -227,6 +229,8 @@ private:
std::shared_ptr<WriteBuffer> maybe_compressed_out;
std::unique_ptr<NativeWriter> block_out;
bool have_more_addresses_to_connect = false;
/// Logger is created lazily, for avoid to run DNS request in constructor.
class LoggerWrapper
{

View File

@ -179,7 +179,7 @@ bool ConnectionEstablisherAsync::checkTimeout()
is_timeout_alarmed = true;
}
if (is_timeout_alarmed && !is_socket_ready)
if (is_timeout_alarmed && !is_socket_ready && !haveMoreAddressesToConnect())
{
/// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run,
/// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
@ -225,6 +225,11 @@ void ConnectionEstablisherAsync::resetResult()
}
}
bool ConnectionEstablisherAsync::haveMoreAddressesToConnect()
{
return !result.entry.isNull() && result.entry->haveMoreAddressesToConnect();
}
#endif
}

View File

@ -104,6 +104,8 @@ private:
void resetResult();
bool haveMoreAddressesToConnect();
ConnectionEstablisher connection_establisher;
TryResult result;
std::string fail_message;

View File

@ -353,6 +353,8 @@ bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLoc
if (replica_state.packet_receiver->isPacketReady())
{
/// Reset the socket timeout after some packet received
replica_state.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout);
last_received_packet = replica_state.packet_receiver->getPacket();
return true;
}

View File

@ -319,24 +319,21 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
Packet packet;
try
{
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
try
packet = current_connection->receivePacket();
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{
packet = current_connection->receivePacket();
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{
/// Exception may happen when packet is received, e.g. when got unknown packet.
/// In this case, invalidate replica, so that we would not read from it anymore.
current_connection->disconnect();
invalidateReplica(state);
}
throw;
/// Exception may happen when packet is received, e.g. when got unknown packet.
/// In this case, invalidate replica, so that we would not read from it anymore.
current_connection->disconnect();
invalidateReplica(state);
}
throw;
}
switch (packet.type)

View File

@ -1,26 +1,4 @@
#include "Allocator.h"
/** Keep definition of this constant in cpp file; otherwise its value
* is inlined into allocator code making it impossible to override it
* in third-party code.
*
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
*/
#ifdef NDEBUG
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif
template class Allocator<false, false>;
template class Allocator<true, false>;
template class Allocator<false, true>;
template class Allocator<true, true>;
template class Allocator<false>;
template class Allocator<true>;

View File

@ -36,51 +36,26 @@
#include <Common/Allocator_fwd.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR;
}
}
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
*/
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
* The interface is different from std::allocator
@ -88,10 +63,8 @@ namespace ErrorCodes
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_, bool mmap_populate>
template <bool clear_memory_>
class Allocator
{
public:
@ -109,7 +82,7 @@ public:
try
{
checkSize(size);
freeNoTrack(buf, size);
freeNoTrack(buf);
CurrentMemoryTracker::free(size);
}
catch (...)
@ -132,49 +105,26 @@ public:
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
&& alignment <= MALLOC_MIN_ALIGNMENT)
else if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
{
DB::throwFromErrno(
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
buf = new_buf;
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
@ -192,83 +142,38 @@ protected:
static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
size_t mmap_min_alignment = ::getPageSize();
if (size >= MMAP_THRESHOLD)
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if (alignment > mmap_min_alignment)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Too large alignment {}: more than page size when allocating {}.",
ReadableSize(alignment), ReadableSize(size));
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (nullptr == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (0 != res)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (clear_memory)
memset(buf, 0, size);
}
return buf;
}
void freeNoTrack(void * buf, size_t size)
void freeNoTrack(void * buf)
{
if (size >= MMAP_THRESHOLD)
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
::free(buf);
}
::free(buf);
}
void checkSize(size_t size)
@ -277,21 +182,6 @@ private:
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint()
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
}
#else
void * getMmapHint()
{
return nullptr;
}
#endif
};
@ -367,7 +257,5 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
/// Prevent implicit template instantiation of Allocator
extern template class Allocator<false, false>;
extern template class Allocator<true, false>;
extern template class Allocator<false, true>;
extern template class Allocator<true, true>;
extern template class Allocator<false>;
extern template class Allocator<true>;

View File

@ -3,7 +3,7 @@
* This file provides forward declarations for Allocator.
*/
template <bool clear_memory_, bool mmap_populate = false>
template <bool clear_memory_>
class Allocator;
template <typename Base, size_t N = 64, size_t Alignment = 1>

View File

@ -5,7 +5,6 @@ namespace DB
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
{
createFiber();
}
void AsyncTaskExecutor::resume()
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
if (routine_is_finished)
return;
/// Create fiber lazily on first resume() call.
if (!fiber)
createFiber();
if (!checkBeforeTaskResume())
return;
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
return;
resumeUnlocked();
/// Destroy fiber when it's finished.
if (routine_is_finished)
destroyFiber();
if (exception)
processException(exception);
}
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
void AsyncTaskExecutor::restart()
{
std::lock_guard guard(fiber_lock);
if (fiber)
if (!routine_is_finished)
destroyFiber();
createFiber();
routine_is_finished = false;
}

View File

@ -173,8 +173,6 @@
M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \

View File

@ -0,0 +1,56 @@
#include <Common/HTTPHeaderFilter.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <re2/re2.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void HTTPHeaderFilter::checkHeaders(const HTTPHeaderEntries & entries) const
{
std::lock_guard guard(mutex);
for (const auto & entry : entries)
{
if (forbidden_headers.contains(entry.name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
"see <http_forbid_headers>", entry.name);
for (const auto & header_regex : forbidden_headers_regexp)
if (re2::RE2::FullMatch(entry.name, header_regex))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
"see <http_forbid_headers>", entry.name);
}
}
void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard guard(mutex);
if (config.has("http_forbid_headers"))
{
std::vector<std::string> keys;
config.keys("http_forbid_headers", keys);
for (const auto & key : keys)
{
if (startsWith(key, "header_regexp"))
forbidden_headers_regexp.push_back(config.getString("http_forbid_headers." + key));
else if (startsWith(key, "header"))
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
}
}
else
{
forbidden_headers.clear();
forbidden_headers_regexp.clear();
}
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <IO/HTTPHeaderEntries.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <vector>
#include <unordered_set>
#include <mutex>
namespace DB
{
class HTTPHeaderFilter
{
public:
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
void checkHeaders(const HTTPHeaderEntries & entries) const;
private:
std::unordered_set<std::string> forbidden_headers;
std::vector<std::string> forbidden_headers_regexp;
mutable std::mutex mutex;
};
}

View File

@ -8,7 +8,7 @@
* table, so it makes sense to pre-fault the pages so that page faults don't
* interrupt the resize loop. Set the allocator parameter accordingly.
*/
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
using HashTableAllocator = Allocator<true /* clear_memory */>;
template <size_t initial_bytes = 64>
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;

View File

@ -3,8 +3,10 @@
#include "KeeperException.h"
#include "TestKeeper.h"
#include <functional>
#include <filesystem>
#include <functional>
#include <ranges>
#include <vector>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
@ -350,15 +352,35 @@ void ZooKeeper::createIfNotExists(const std::string & path, const std::string &
void ZooKeeper::createAncestors(const std::string & path)
{
size_t pos = 1;
std::string data;
std::string path_created; // Ignored
std::vector<std::string> pending_nodes;
size_t last_pos = path.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
return;
std::string current_node = path.substr(0, last_pos);
while (true)
{
pos = path.find('/', pos);
if (pos == std::string::npos)
Coordination::Error code = createImpl(current_node, data, CreateMode::Persistent, path_created);
if (code == Coordination::Error::ZNONODE)
{
/// The parent node doesn't exist. Save the current node and try with the parent
last_pos = current_node.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
throw KeeperException(code, path);
pending_nodes.emplace_back(std::move(current_node));
current_node = path.substr(0, last_pos);
}
else if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
break;
createIfNotExists(path.substr(0, pos), "");
++pos;
else
throw KeeperException(code, path);
}
for (const std::string & pending : pending_nodes | std::views::reverse)
createIfNotExists(pending, data);
}
void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)

View File

@ -54,6 +54,7 @@
#cmakedefine01 USE_BORINGSSL
#cmakedefine01 USE_BLAKE3
#cmakedefine01 USE_SKIM
#cmakedefine01 USE_PRQL
#cmakedefine01 USE_OPENSSL_INTREE
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE

View File

@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config)
initializeDisks(config);
}
namespace
{
bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix)
{
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
using namespace std::literals;
static constexpr std::array supported_disk_types
{
"s3"sv,
"s3_plain"sv,
"local"sv
};
if (std::all_of(
supported_disk_types.begin(),
supported_disk_types.end(),
[&](const auto supported_type) { return disk_type != supported_type; }))
{
LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type);
return false;
}
return true;
}
}
void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config)
{
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance());
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
log_storage = getLogsPathFromConfig(config);

View File

@ -473,23 +473,30 @@ void KeeperDispatcher::shutdown()
session_to_response_callback.clear();
}
// if there is no leader, there is no reason to do CLOSE because it's a write request
if (server && hasLeader() && !close_requests.empty())
if (server && !close_requests.empty())
{
LOG_INFO(log, "Trying to close {} session(s)", close_requests.size());
const auto raft_result = server->putRequestBatch(close_requests);
auto sessions_closing_done_promise = std::make_shared<std::promise<void>>();
auto sessions_closing_done = sessions_closing_done_promise->get_future();
raft_result->when_ready([my_sessions_closing_done_promise = std::move(sessions_closing_done_promise)](
nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/,
nuraft::ptr<std::exception> & /*exception*/) { my_sessions_closing_done_promise->set_value(); });
// if there is no leader, there is no reason to do CLOSE because it's a write request
if (hasLeader())
{
LOG_INFO(log, "Trying to close {} session(s)", close_requests.size());
const auto raft_result = server->putRequestBatch(close_requests);
auto sessions_closing_done_promise = std::make_shared<std::promise<void>>();
auto sessions_closing_done = sessions_closing_done_promise->get_future();
raft_result->when_ready([my_sessions_closing_done_promise = std::move(sessions_closing_done_promise)](
nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/,
nuraft::ptr<std::exception> & /*exception*/) { my_sessions_closing_done_promise->set_value(); });
auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds();
if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready)
LOG_WARNING(
log,
"Failed to close sessions in {}ms. If they are not closed, they will be closed after session timeout.",
session_shutdown_timeout);
auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds();
if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready)
LOG_WARNING(
log,
"Failed to close sessions in {}ms. If they are not closed, they will be closed after session timeout.",
session_shutdown_timeout);
}
else
{
LOG_INFO(log, "Sessions cannot be closed during shutdown because there is no active leader");
}
}
if (server)

View File

@ -390,7 +390,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
/// maybe some logs were preprocessed with log idx larger than the snapshot idx
/// we have to apply them to the new storage
storage->applyUncommittedState(*snapshot_deserialization_result.storage, s.get_last_log_idx());
storage->applyUncommittedState(*snapshot_deserialization_result.storage, snapshot_deserialization_result.storage->getZXID());
storage = std::move(snapshot_deserialization_result.storage);
latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta;
cluster_config = snapshot_deserialization_result.cluster_config;

View File

@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)
/** Components of DecimalX value:
* whole - represents whole part of decimal, can be negative or positive.
* fractional - for fractional part of decimal, always positive.
* fractional - for fractional part of decimal.
*
* 0.123 represents 0 / 0.123
* -0.123 represents 0 / -0.123
* -1.123 represents -1 / 0.123
*/
template <typename DecimalType>
struct DecimalComponents

View File

@ -40,9 +40,9 @@ namespace MySQLReplication
void EventHeader::dump(WriteBuffer & out) const
{
out << "\n=== " << to_string(this->type) << " ===" << '\n';
out << "\n=== " << magic_enum::enum_name(this->type) << " ===" << '\n';
out << "Timestamp: " << this->timestamp << '\n';
out << "Event Type: " << to_string(this->type) << '\n';
out << "Event Type: " << magic_enum::enum_name(this->type) << '\n';
out << "Server ID: " << this->server_id << '\n';
out << "Event Size: " << this->event_size << '\n';
out << "Log Pos: " << this->log_pos << '\n';
@ -121,6 +121,17 @@ namespace MySQLReplication
{
typ = QUERY_SAVEPOINT;
}
// https://dev.mysql.com/worklog/task/?id=13355
// When doing query "CREATE TABLE xx AS SELECT", the binlog will be
// "CREATE TABLE ... START TRANSACTION", the DDL will be failed
// so, just ignore the "START TRANSACTION" suffix
if (query.ends_with("START TRANSACTION"))
{
auto pos = query.rfind("START TRANSACTION");
if (pos > 0)
query.resize(pos);
}
}
void QueryEvent::dump(WriteBuffer & out) const

View File

@ -120,22 +120,6 @@ namespace MySQLReplication
BINLOG_CHECKSUM_ALG_UNDEF = 255
};
inline String to_string(BinlogChecksumAlg type)
{
switch (type)
{
case BINLOG_CHECKSUM_ALG_OFF:
return "BINLOG_CHECKSUM_ALG_OFF";
case BINLOG_CHECKSUM_ALG_CRC32:
return "BINLOG_CHECKSUM_ALG_CRC32";
case BINLOG_CHECKSUM_ALG_ENUM_END:
return "BINLOG_CHECKSUM_ALG_ENUM_END";
case BINLOG_CHECKSUM_ALG_UNDEF:
return "BINLOG_CHECKSUM_ALG_UNDEF";
}
return std::string("Unknown checksum alg: ") + std::to_string(static_cast<int>(type));
}
/// http://dev.mysql.com/doc/internals/en/binlog-event-type.html
enum EventType
{
@ -187,102 +171,6 @@ namespace MySQLReplication
MARIA_START_ENCRYPTION_EVENT = 164,
};
inline String to_string(EventType type)
{
switch (type)
{
case START_EVENT_V3:
return "StartEventV3";
case QUERY_EVENT:
return "QueryEvent";
case STOP_EVENT:
return "StopEvent";
case ROTATE_EVENT:
return "RotateEvent";
case INT_VAR_EVENT:
return "IntVarEvent";
case LOAD_EVENT:
return "LoadEvent";
case SLAVE_EVENT:
return "SlaveEvent";
case CREATE_FILE_EVENT:
return "CreateFileEvent";
case APPEND_BLOCK_EVENT:
return "AppendBlockEvent";
case EXEC_LOAD_EVENT:
return "ExecLoadEvent";
case DELETE_FILE_EVENT:
return "DeleteFileEvent";
case NEW_LOAD_EVENT:
return "NewLoadEvent";
case RAND_EVENT:
return "RandEvent";
case USER_VAR_EVENT:
return "UserVarEvent";
case FORMAT_DESCRIPTION_EVENT:
return "FormatDescriptionEvent";
case XID_EVENT:
return "XIDEvent";
case BEGIN_LOAD_QUERY_EVENT:
return "BeginLoadQueryEvent";
case EXECUTE_LOAD_QUERY_EVENT:
return "ExecuteLoadQueryEvent";
case TABLE_MAP_EVENT:
return "TableMapEvent";
case WRITE_ROWS_EVENT_V0:
return "WriteRowsEventV0";
case UPDATE_ROWS_EVENT_V0:
return "UpdateRowsEventV0";
case DELETE_ROWS_EVENT_V0:
return "DeleteRowsEventV0";
case WRITE_ROWS_EVENT_V1:
return "WriteRowsEventV1";
case UPDATE_ROWS_EVENT_V1:
return "UpdateRowsEventV1";
case DELETE_ROWS_EVENT_V1:
return "DeleteRowsEventV1";
case INCIDENT_EVENT:
return "IncidentEvent";
case HEARTBEAT_EVENT:
return "HeartbeatEvent";
case IGNORABLE_EVENT:
return "IgnorableEvent";
case ROWS_QUERY_EVENT:
return "RowsQueryEvent";
case WRITE_ROWS_EVENT_V2:
return "WriteRowsEventV2";
case UPDATE_ROWS_EVENT_V2:
return "UpdateRowsEventV2";
case DELETE_ROWS_EVENT_V2:
return "DeleteRowsEventV2";
case GTID_EVENT:
return "GTIDEvent";
case ANONYMOUS_GTID_EVENT:
return "AnonymousGTIDEvent";
case PREVIOUS_GTIDS_EVENT:
return "PreviousGTIDsEvent";
case TRANSACTION_CONTEXT_EVENT:
return "TransactionContextEvent";
case VIEW_CHANGE_EVENT:
return "ViewChangeEvent";
case XA_PREPARE_LOG_EVENT:
return "XAPrepareLogEvent";
case MARIA_ANNOTATE_ROWS_EVENT:
return "MariaAnnotateRowsEvent";
case MARIA_BINLOG_CHECKPOINT_EVENT:
return "MariaBinlogCheckpointEvent";
case MARIA_GTID_EVENT:
return "MariaGTIDEvent";
case MARIA_GTID_LIST_EVENT:
return "MariaGTIDListEvent";
case MARIA_START_ENCRYPTION_EVENT:
return "MariaStartEncryptionEvent";
default:
break;
}
return std::string("Unknown event: ") + std::to_string(static_cast<int>(type));
}
enum MySQLEventType
{
MYSQL_UNHANDLED_EVENT = 0,

View File

@ -127,8 +127,8 @@ class IColumn;
\
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \
M(Bool, move_all_conditions_to_prewhere, false, "Move all viable conditions from WHERE to PREWHERE", 0) \
M(Bool, enable_multiple_prewhere_read_steps, false, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \
M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
\
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
@ -875,6 +875,7 @@ class IColumn;
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \

View File

@ -138,7 +138,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}})
{"kusto", Dialect::kusto},
{"kusto", Dialect::kusto},
{"prql", Dialect::prql}})
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?
IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,

View File

@ -207,6 +207,7 @@ enum class Dialect
clickhouse,
kusto,
kusto_auto,
prql,
};
DECLARE_SETTING_ENUM(Dialect)

View File

@ -814,8 +814,8 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
{
auto query_context = Context::createCopy(getContext());
query_context->makeQueryContext();
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->getClientInfo().is_replicated_database_internal = true;
query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
query_context->setQueryKindReplicatedDatabaseInternal();
query_context->setCurrentDatabase(getDatabaseName());
query_context->setCurrentQueryId("");
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");

View File

@ -3,6 +3,7 @@
#if USE_MYSQL
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <cstdlib>
#include <random>
#include <string_view>
@ -59,7 +60,7 @@ static ContextMutablePtr createQueryContext(ContextPtr context)
query_context->setSettings(new_query_settings);
query_context->setInternalQuery(true);
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY);
query_context->setCurrentQueryId(""); // generate random query_id
return query_context;
}
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
}
}
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
{
String table_name;
String database_name;
if (ddl.empty()) return std::make_tuple(database_name, table_name);
bool parse_failed = false;
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
IParser::Pos pos(tokens, 0);
Expected expected;
ASTPtr res;
ASTPtr table;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else
{
parse_failed = true;
}
if (!parse_failed)
{
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
{
database_name = table_id.database_name;
table_name = table_id.table_name;
}
}
return std::make_tuple(database_name, table_name);
}
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
ContextPtr context_,
const String & database_name_,
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
String query = query_event.query;
if (!materialized_tables_list.empty())
{
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query);
if (!ddl_table_name.empty())
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
if (!table_id.table_name.empty())
{
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name;
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
{
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query);
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
return;
}
}

View File

@ -0,0 +1,185 @@
#include "config.h"
#include <gtest/gtest.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
using namespace DB;
struct ParseTableIDFromDDLTestCase
{
String query;
String database_name;
String table_name;
ParseTableIDFromDDLTestCase(
const String & query_,
const String & database_name_,
const String & table_name_)
: query(query_)
, database_name(database_name_)
, table_name(table_name_)
{
}
};
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
{
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
}
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
{
};
TEST_P(ParseTableIDFromDDLTest, parse)
{
const auto & [query, expected_database_name, expected_table_name] = GetParam();
auto table_id = tryParseTableIDFromDDL(query, "default");
EXPECT_EQ(expected_database_name, table_id.database_name);
EXPECT_EQ(expected_table_name, table_id.table_name);
}
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
{
"SELECT * FROM db.table",
"",
""
},
{
"CREATE TEMPORARY TABLE db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE table",
"default",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"CREATE TABLE db.table",
"db",
"table"
},
{
"CREATE TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TABLE table",
"default",
"table"
},
{
"CREATE TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"ALTER TABLE db.table",
"db",
"table"
},
{
"ALTER TABLE table",
"default",
"table"
},
{
"DROP TABLE db.table",
"db",
"table"
},
{
"DROP TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TABLE table",
"default",
"table"
},
{
"DROP TABLE IF EXISTS table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS table",
"default",
"table"
},
{
"TRUNCATE db.table",
"db",
"table"
},
{
"TRUNCATE TABLE db.table",
"db",
"table"
},
{
"TRUNCATE table1",
"default",
"table1"
},
{
"TRUNCATE TABLE table",
"default",
"table"
},
{
"RENAME TABLE db.table",
"db",
"table"
},
{
"RENAME TABLE table",
"default",
"table"
},
{
"DROP DATABASE db",
"",
""
},
{
"DROP DATA`BASE db",
"",
""
},
{
"NOT A SQL",
"",
""
},
}));

View File

@ -0,0 +1,44 @@
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
{
bool is_ddl = false;
Tokens tokens(query.data(), query.data() + query.size());
IParser::Pos pos(tokens, 0);
Expected expected;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
is_ddl = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
is_ddl = true;
}
ASTPtr table;
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
return StorageID::createEmpty();
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
if (table_id.database_name.empty())
table_id.database_name = default_database_name;
return table_id;
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <base/types.h>
#include <Storages/IStorage.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
}

View File

@ -257,7 +257,6 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
const auto & headers_prefix = settings_config_prefix + ".headers";
if (config.has(headers_prefix))
{
Poco::Util::AbstractConfiguration::Keys config_keys;
@ -297,7 +296,10 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
if (created_from_ddl)
{
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
}
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
};

View File

@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const
}
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
auto disk_config_prefix = config_prefix + "." + disk_name;
if (disk_validator && !disk_validator(config, disk_config_prefix))
continue;
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
}
if (!has_default_disk)

View File

@ -23,7 +23,8 @@ public:
DiskSelector() = default;
DiskSelector(const DiskSelector & from) = default;
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
using DiskValidator = std::function<bool(const Poco::Util::AbstractConfiguration & config, const String & disk_config_prefix)>;
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {});
DiskSelectorPtr updateFromConfig(
const Poco::Util::AbstractConfiguration & config,

View File

@ -60,7 +60,7 @@ ClientConfigurationPerRequest ProxyResolverConfiguration::getConfiguration(const
{
auto resolved_endpoint = endpoint;
resolved_endpoint.setHost(resolved_hosts[i].toString());
session = makeHTTPSession(resolved_endpoint, timeouts, false);
session = makeHTTPSession(resolved_endpoint, timeouts);
try
{

View File

@ -23,10 +23,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_)
: TemporaryFileOnDisk(disk_, "")
{}
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope)
: TemporaryFileOnDisk(disk_)
{

View File

@ -16,9 +16,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
class TemporaryFileOnDisk
{
public:
explicit TemporaryFileOnDisk(const DiskPtr & disk_);
explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope);
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix);
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix = "tmp");
~TemporaryFileOnDisk();

View File

@ -32,7 +32,7 @@ namespace
/// We need a unique name for a created custom disk, but it needs to be the same
/// after table is reattached or server is restarted, so take a hash of the disk
/// configuration serialized ast as a disk name suffix.
auto disk_setting_string = serializeAST(function, true);
auto disk_setting_string = serializeAST(function);
disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX
+ toString(sipHash128(disk_setting_string.data(), disk_setting_string.size()));
}

View File

@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;

View File

@ -152,6 +152,7 @@ struct FormatSettings
bool trim_whitespaces = true;
bool allow_whitespace_or_tab_as_delimiter = false;
bool allow_variable_number_of_columns = false;
bool use_default_on_bad_values = false;
} csv;
struct HiveText

View File

@ -19,6 +19,9 @@
namespace DB
{
static constexpr auto microsecond_multiplier = 1000000;
static constexpr auto millisecond_multiplier = 1000;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -1377,6 +1380,36 @@ struct ToRelativeSecondNumImpl
using FactorTransform = ZeroTransform;
};
template <Int64 scale_multiplier>
struct ToRelativeSubsecondNumImpl
{
static constexpr auto name = "toRelativeSubsecondNumImpl";
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
{
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
if (scale == scale_multiplier)
return t.value;
if (scale > scale_multiplier)
return t.value / (scale / scale_multiplier);
return t.value * (scale_multiplier / scale);
}
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
{
return t * scale_multiplier;
}
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
{
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
}
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
}
using FactorTransform = ZeroTransform;
};
struct ToYYYYMMImpl
{
static constexpr auto name = "toYYYYMM";
@ -1476,25 +1509,47 @@ struct ToYYYYMMDDhhmmssImpl
using FactorTransform = ZeroTransform;
};
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
{
UInt16 millisecond;
UInt16 microsecond;
};
struct ToDateTimeComponentsImpl
{
static constexpr auto name = "toDateTimeComponents";
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone)
static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
{
return time_zone.toDateTimeComponents(t);
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
if (t.value < 0 && components.fractional)
{
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
--components.whole;
}
Int64 fractional = components.fractional;
if (scale_multiplier > microsecond_multiplier)
fractional = fractional / (scale_multiplier / microsecond_multiplier);
else if (scale_multiplier < microsecond_multiplier)
fractional = fractional * (microsecond_multiplier / scale_multiplier);
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
}
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone)
static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t));
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
}
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone)
static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toDateTimeComponents(ExtendedDayNum(d));
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
}
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone)
static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toDateTimeComponents(DayNum(d));
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
}
using FactorTransform = ZeroTransform;

View File

@ -79,28 +79,51 @@ namespace impl
UInt64 key1 = 0;
};
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
struct SipHashKeyColumns
{
SipHashKey ret{};
ColumnPtr key0;
ColumnPtr key1;
bool is_const;
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
size_t size() const
{
assert(key0 && key1);
assert(key0->size() == key1->size());
return key0->size();
}
SipHashKey getKey(size_t i) const
{
if (is_const)
i = 0;
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
return {key0data[i], key1data[i]};
}
};
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
{
const ColumnTuple * tuple = nullptr;
const auto * column = key.column.get();
bool is_const = false;
if (isColumnConst(*column))
{
is_const = true;
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
}
else
tuple = checkAndGetColumn<ColumnTuple>(column);
if (!tuple)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
if (tuple->tupleSize() != 2)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
if (tuple->empty())
return ret;
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
ret.key0 = key0col->get64(0);
else
SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
assert(ret.key0);
if (!checkColumn<ColumnUInt64>(*ret.key0))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
ret.key1 = key1col->get64(0);
else
assert(ret.key1);
if (!checkColumn<ColumnUInt64>(*ret.key1))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
return ret;
@ -329,8 +352,10 @@ struct SipHash64KeyedImpl
static constexpr auto name = "sipHash64Keyed";
using ReturnType = UInt64;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
@ -371,8 +396,10 @@ struct SipHash128KeyedImpl
static constexpr auto name = "sipHash128Keyed";
using ReturnType = UInt128;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
@ -398,13 +425,43 @@ struct SipHash128ReferenceImpl
using ReturnType = UInt128;
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
static constexpr bool use_int_hash_for_pods = false;
};
struct SipHash128ReferenceKeyedImpl
{
static constexpr auto name = "sipHash128ReferenceKeyed";
using ReturnType = UInt128;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
{
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
}
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
UInt128 tmp;
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
h1 = tmp;
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
h2 = tmp;
#endif
UInt128 hashes[] = {h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
}
static constexpr bool use_int_hash_for_pods = false;
};
/** Why we need MurmurHash2?
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
@ -1023,7 +1080,7 @@ private:
DECLARE_MULTITARGET_CODE(
template <typename Impl, bool Keyed, typename KeyType>
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
class FunctionAnyHash : public IFunction
{
public:
@ -1033,9 +1090,12 @@ private:
using ToType = typename Impl::ReturnType;
template <typename FromType, bool first>
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{
using ColVecType = ColumnVectorOrDecimal<FromType>;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
{
@ -1044,6 +1104,9 @@ private:
for (size_t i = 0; i < size; ++i)
{
ToType hash;
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
if constexpr (Impl::use_int_hash_for_pods)
{
@ -1077,6 +1140,14 @@ private:
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
}
}
auto value = col_from_const->template getValue<FromType>();
ToType hash;
@ -1107,8 +1178,15 @@ private:
if constexpr (first)
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1116,9 +1194,12 @@ private:
}
template <typename FromType, bool first>
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{
using ColVecType = ColumnVectorOrDecimal<FromType>;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
{
@ -1127,6 +1208,9 @@ private:
for (size_t i = 0; i < size; ++i)
{
ToType hash;
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
if constexpr (std::endian::native == std::endian::little)
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
else
@ -1143,6 +1227,14 @@ private:
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
}
}
auto value = col_from_const->template getValue<FromType>();
ToType hash;
@ -1158,8 +1250,15 @@ private:
if constexpr (first)
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1167,10 +1266,16 @@ private:
}
template <bool first>
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
for (size_t i = 0, size = column->size(); i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
StringRef bytes = column->getDataAt(i);
const ToType hash = apply(key, bytes.data, bytes.size);
if constexpr (first)
@ -1181,8 +1286,11 @@ private:
}
template <bool first>
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
{
const typename ColumnString::Chars & data = col_from->getChars();
@ -1192,6 +1300,9 @@ private:
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
const ToType hash = apply(key,
reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1);
@ -1212,6 +1323,9 @@ private:
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
if constexpr (first)
vec_to[i] = hash;
@ -1221,6 +1335,14 @@ private:
}
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
{
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeString<first>(key_cols, full_column.get(), vec_to);
}
}
String value = col_from_const->getValue<String>();
const ToType hash = apply(key, value.data(), value.size());
const size_t size = vec_to.size();
@ -1228,8 +1350,15 @@ private:
if constexpr (first)
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1237,7 +1366,7 @@ private:
}
template <bool first>
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
@ -1249,13 +1378,19 @@ private:
typename ColumnVector<ToType>::Container vec_temp(nested_size);
bool nested_is_first = true;
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
const size_t size = offsets.size();
ColumnArray::Offset current_offset = 0;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
ColumnArray::Offset next_offset = offsets[i];
ToType hash;
@ -1279,7 +1414,7 @@ private:
{
/// NOTE: here, of course, you can do without the materialization of the column.
ColumnPtr full_column = col_from_const->convertToFullColumn();
executeArray<first>(key, type, full_column.get(), vec_to);
executeArray<first>(key_cols, type, full_column.get(), vec_to);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1287,7 +1422,7 @@ private:
}
template <bool first>
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
{
WhichDataType which(from_type);
@ -1295,40 +1430,45 @@ private:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
if constexpr (Keyed)
if ((!key_cols.is_const && key_cols.size() != vec_to.size())
|| (key_cols.is_const && key_cols.size() != 1))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
else if (which.isString()) executeString<first>(key, icolumn, vec_to);
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
else executeGeneric<first>(key, icolumn, vec_to);
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
else executeGeneric<first>(key_cols, icolumn, vec_to);
}
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
{
/// Flattening of tuples.
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
@ -1337,7 +1477,7 @@ private:
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
size_t tuple_size = tuple_columns.size();
for (size_t i = 0; i < tuple_size; ++i)
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
}
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
{
@ -1347,24 +1487,24 @@ private:
for (size_t i = 0; i < tuple_size; ++i)
{
auto tmp = ColumnConst::create(tuple_columns[i], column->size());
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
}
}
else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
{
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
}
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
{
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
}
else
{
if (is_first)
executeAny<true>(key, type, column, vec_to);
executeAny<true>(key_cols, type, column, vec_to);
else
executeAny<false>(key, type, column, vec_to);
executeAny<false>(key_cols, type, column, vec_to);
}
is_first = false;
@ -1395,30 +1535,33 @@ public:
{
auto col_to = ColumnVector<ToType>::create(input_rows_count);
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
/// If using a "keyed" algorithm, the first argument is the key and
/// the data starts from the second argument.
/// Otherwise there is no key and all arguments are interpreted as data.
constexpr size_t first_data_argument = Keyed;
if (arguments.size() <= first_data_argument)
if (input_rows_count != 0)
{
/// Return a fixed random-looking magic number when input is empty
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
KeyType key{};
if constexpr (Keyed)
if (!arguments.empty())
key = Impl::parseKey(arguments[0]);
/// If using a "keyed" algorithm, the first argument is the key and
/// the data starts from the second argument.
/// Otherwise there is no key and all arguments are interpreted as data.
constexpr size_t first_data_argument = Keyed;
/// The function supports arbitrary number of arguments of arbitrary types.
bool is_first_argument = true;
for (size_t i = first_data_argument; i < arguments.size(); ++i)
{
const auto & col = arguments[i];
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
if (arguments.size() <= first_data_argument)
{
/// Return a fixed random-looking magic number when input is empty
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
KeyColumnsType key_cols{};
if constexpr (Keyed)
if (!arguments.empty())
key_cols = Impl::parseKeyColumns(arguments[0]);
/// The function supports arbitrary number of arguments of arbitrary types.
bool is_first_argument = true;
for (size_t i = first_data_argument; i < arguments.size(); ++i)
{
const auto & col = arguments[i];
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
}
}
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
@ -1450,17 +1593,19 @@ public:
) // DECLARE_MULTITARGET_CODE
template <typename Impl, bool Keyed = false, typename KeyType = char>
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
{
public:
explicit FunctionAnyHash(ContextPtr context) : selector(context)
{
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
selector
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
#if USE_MULTITARGET_CODE
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
selector
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
#endif
}
@ -1696,7 +1841,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
#if USE_SSL
@ -1710,8 +1855,10 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
#endif
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
using FunctionSipHash128ReferenceKeyed
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;

View File

@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing)
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
.categories{"Hash"}
});
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
"instead of using a fixed key.",
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
.categories{"Hash"}});
factory.registerFunction<FunctionCityHash64>();
factory.registerFunction<FunctionFarmFingerprint64>();
factory.registerFunction<FunctionFarmHash64>();

View File

@ -7,8 +7,8 @@
namespace DB
{
/** URL processing functions. See implementation in separate .cpp files.
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
*
* Functions for extraction parts of URL.
* If URL has nothing like, then empty string is returned.
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};
@ -156,7 +156,7 @@ struct CutSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};

View File

@ -5,7 +5,7 @@
namespace DB
{
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
*
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
* invokes Transform::execute() with either:
@ -80,7 +80,10 @@ public:
}
else
{
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
if (t.value < 0 && components.fractional)
--components.whole;
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
}
}

View File

@ -1,8 +1,8 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/StringHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
#include "FunctionsURL.h"
namespace DB
{

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
{

View File

@ -1,7 +1,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
}
}

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include <base/find_symbols.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <Common/StringUtils/StringUtils.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -54,4 +54,3 @@ struct ExtractProtocol
};
}

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
};
}

View File

@ -174,12 +174,13 @@ public:
{
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
- static_cast<Int64>(transform_x.execute(x, timezone_x));
DateLUTImpl::DateTimeComponents a_comp;
DateLUTImpl::DateTimeComponents b_comp;
DateTimeComponentsWithFractionalPart a_comp;
DateTimeComponentsWithFractionalPart b_comp;
Int64 adjust_value;
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
if (x_seconds <= y_seconds)
auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
if (x_microseconds <= y_microseconds)
{
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
@ -192,14 +193,16 @@ public:
adjust_value = 1;
}
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
{
if ((a_comp.date.month > b_comp.date.month)
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
)))))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
@ -210,8 +213,9 @@ public:
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
)))))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
@ -219,8 +223,9 @@ public:
if ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
)))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
@ -230,25 +235,44 @@ public:
if ((x_day_of_week > y_day_of_week)
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
{
if ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
{
if ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
{
if (a_comp.time.second > b_comp.time.second)
if ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
{
if ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
{
if (a_comp.microsecond > b_comp.microsecond)
res += adjust_value;
}
return res;
@ -373,6 +397,10 @@ public:
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "second" || unit == "ss" || unit == "s")
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "millisecond" || unit == "ms")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "microsecond" || unit == "us" || unit == "u")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} does not support '{}' unit", getName(), unit);

View File

@ -0,0 +1,42 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
namespace DB
{
struct FirstLine
{
static size_t getReserveLengthForElement() { return 16; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
const Pos end = data + size;
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
res_size = pos - data;
}
};
struct NameFirstLine
{
static constexpr auto name = "firstLine";
};
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
REGISTER_FUNCTION(FirstLine)
{
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
.description = "Returns first line of a multi-line string.",
.syntax = "firstLine(string)",
.arguments = {{.name = "string", .description = "The string to process."}},
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
.examples = {
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
}});
}
}

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
}
namespace
@ -108,6 +109,12 @@ public:
/// S2 acceptes point as (latitude, longitude)
S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon);
if (!lat_lng.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive"
"and the longitude is between -180 and 180 degrees inclusive.");
S2CellId id(lat_lng);
dst_data[row] = id.id();

View File

@ -119,7 +119,7 @@ public:
if (!lhs_array->hasEqualOffsets(*rhs_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array offsets",
"The argument 2 and argument {} of function {} have different array offsets",
i + 1,
getName());

View File

@ -138,6 +138,7 @@ private:
REGISTER_FUNCTION(Now)
{
factory.registerFunction<NowOverloadResolver>({}, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -114,13 +114,18 @@ public:
const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
rect.AddPoint(point.ToPoint());

View File

@ -107,13 +107,18 @@ public:
const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
dst_data.emplace_back(rect.Contains(point.ToLatLng()));
}

View File

@ -128,15 +128,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be.");
S2LatLngRect rect_intersection = rect1.Intersection(rect2);
vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id());

View File

@ -126,15 +126,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
S2LatLngRect rect_union = rect1.Union(rect2);
vec_res_first.emplace_back(S2CellId(rect_union.lo()).id());

View File

@ -97,7 +97,7 @@ public:
const auto id = S2CellId(data_id[row]);
if (!id.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
throw Exception(ErrorCodes::BAD_ARGUMENTS, "CellId is invalid.");
S2Point point = id.ToPoint();
S2LatLng ll(point);

View File

@ -10,7 +10,6 @@
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Functions/TransformDateTime64.h>
#include <IO/WriteHelpers.h>

Some files were not shown because too many files have changed in this diff Show More