mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
Merge remote-tracking branch 'origin/master' into less-contentaion-in-cache-part4
This commit is contained in:
commit
c93510b7cf
@ -96,7 +96,6 @@ Checks: [
|
||||
'-modernize-use-default-member-init',
|
||||
'-modernize-use-emplace',
|
||||
'-modernize-use-nodiscard',
|
||||
'-modernize-use-override',
|
||||
'-modernize-use-trailing-return-type',
|
||||
|
||||
'-performance-inefficient-string-concatenation',
|
||||
|
@ -123,7 +123,6 @@
|
||||
* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
|
||||
* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
@ -314,13 +314,13 @@ static int read_unicode(json_stream *json)
|
||||
|
||||
if (l < 0xdc00 || l > 0xdfff) {
|
||||
json_error(json, "invalid surrogate pair continuation \\u%04lx out "
|
||||
"of range (dc00-dfff)", l);
|
||||
"of range (dc00-dfff)", (unsigned long)l);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
|
||||
} else if (cp >= 0xdc00 && cp <= 0xdfff) {
|
||||
json_error(json, "dangling surrogate \\u%04lx", cp);
|
||||
json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
|
||||
Subproject commit 08ac76ea80a37f89b12109c805eafe9f1dc9b991
|
@ -32,6 +32,7 @@ set(SRCS
|
||||
"${LIBRARY_DIR}/src/handle_custom_notification.cxx"
|
||||
"${LIBRARY_DIR}/src/handle_vote.cxx"
|
||||
"${LIBRARY_DIR}/src/launcher.cxx"
|
||||
"${LIBRARY_DIR}/src/log_entry.cxx"
|
||||
"${LIBRARY_DIR}/src/srv_config.cxx"
|
||||
"${LIBRARY_DIR}/src/snapshot_sync_req.cxx"
|
||||
"${LIBRARY_DIR}/src/snapshot_sync_ctx.cxx"
|
||||
|
@ -257,10 +257,10 @@ do
|
||||
echo "$err"
|
||||
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
|
||||
err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
|
||||
echo "$err"
|
||||
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
|
||||
err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 )
|
||||
err=$( { clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 )
|
||||
echo "$err"
|
||||
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
|
||||
fi
|
||||
|
@ -87,6 +87,25 @@ if [ "$cache_policy" = "SLRU" ]; then
|
||||
mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
fi
|
||||
|
||||
# Disable experimental WINDOW VIEW tests for stress tests, since they may be
|
||||
# created with old analyzer and then, after server restart it will refuse to
|
||||
# start.
|
||||
# FIXME: remove once the support for WINDOW VIEW will be implemented in analyzer.
|
||||
sudo cat /etc/clickhouse-server/users.d/stress_tests_overrides.xml <<EOL
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_experimental_window_view>false</allow_experimental_window_view>
|
||||
<constraints>
|
||||
<allow_experimental_window_view>
|
||||
<readonly/>
|
||||
</allow_experimental_window_view>
|
||||
</constraints>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
start_server
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
|
@ -36,7 +36,7 @@ E.g. configuration option
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
@ -47,7 +47,7 @@ is equal to configuration (from `24.1`):
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3>
|
||||
```
|
||||
|
||||
@ -56,7 +56,7 @@ Configuration
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
@ -67,7 +67,7 @@ is equal to
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
@ -79,7 +79,7 @@ Example of full storage configuration will look like:
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
@ -105,7 +105,7 @@ Starting with 24.1 clickhouse version, it can also look like:
|
||||
<object_storage_type>s3</object_storage_type>
|
||||
<metadata_type>local</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
@ -324,7 +324,7 @@ Configuration:
|
||||
<s3_plain>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
@ -337,7 +337,7 @@ Configuration:
|
||||
<object_storage_type>azure</object_storage_type>
|
||||
<metadata_type>plain</metadata_type>
|
||||
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
|
||||
<use_invironment_credentials>1</use_invironment_credentials>
|
||||
<use_environment_credentials>1</use_environment_credentials>
|
||||
</s3_plain>
|
||||
```
|
||||
|
||||
@ -520,13 +520,13 @@ Example of configuration for versions later or equal to 22.8:
|
||||
</cache>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3-cache>
|
||||
<s3_cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
</s3_cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
@ -546,13 +546,13 @@ Example of configuration for versions earlier than 22.8:
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<s3-cache>
|
||||
<s3_cache>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</s3-cache>
|
||||
</s3_cache>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
@ -47,7 +47,7 @@ An example:
|
||||
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
|
||||
-->
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<max_size_rows>1048576</max_size>
|
||||
<max_size_rows>1048576</max_size_rows>
|
||||
<reserved_size_rows>8192</reserved_size_rows>
|
||||
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
|
||||
<flush_on_crash>false</flush_on_crash>
|
||||
|
@ -483,7 +483,7 @@ Where:
|
||||
|
||||
- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition).
|
||||
- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions).
|
||||
- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions).
|
||||
- `r3`- the number of unique visitors who visited the site during a specific time period on 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions).
|
||||
|
||||
## uniqUpTo(N)(x)
|
||||
|
||||
|
@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting.
|
||||
|
||||
## Examples
|
||||
|
||||
@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
|
||||
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -395,3 +395,37 @@ SELECT v, variantType(v) FROM test ORDER by v;
|
||||
│ 100 │ UInt32 │
|
||||
└─────┴────────────────┘
|
||||
```
|
||||
|
||||
## JSONExtract functions with Variant
|
||||
|
||||
All `JSONExtract*` functions support `Variant` type:
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(UInt32, String, Array(UInt32))') AS variant, variantType(variant) AS variant_type;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─variant─┬─variant_type──┐
|
||||
│ [1,2,3] │ Array(UInt32) │
|
||||
└─────────┴───────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) AS map_of_variant_types
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_variants──────────────────┬─map_of_variant_types────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS variants, arrayMap(x -> (x.1, variantType(x.2)), variants) AS variant_types
|
||||
```
|
||||
|
||||
```text
|
||||
┌─variants───────────────────────────────┬─variant_types─────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -1670,7 +1670,7 @@ Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../.
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 nanosecond.
|
||||
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
|
||||
|
||||
For an alternative to `age`, see function `date\_diff`.
|
||||
@ -1686,16 +1686,17 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` `microseconds` `us` `u`
|
||||
- `millisecond` `milliseconds` `ms`
|
||||
- `second` `seconds` `ss` `s`
|
||||
- `minute` `minutes` `mi` `n`
|
||||
- `hour` `hours` `hh` `h`
|
||||
- `day` `days` `dd` `d`
|
||||
- `week` `weeks` `wk` `ww`
|
||||
- `month` `months` `mm` `m`
|
||||
- `quarter` `quarters` `qq` `q`
|
||||
- `year` `years` `yyyy` `yy`
|
||||
- `nanosecond`, `nanoseconds`, `ns`
|
||||
- `microsecond`, `microseconds`, `us`, `u`
|
||||
- `millisecond`, `milliseconds`, `ms`
|
||||
- `second`, `seconds`, `ss`, `s`
|
||||
- `minute`, `minutes`, `mi`, `n`
|
||||
- `hour`, `hours`, `hh`, `h`
|
||||
- `day`, `days`, `dd`, `d`
|
||||
- `week`, `weeks`, `wk`, `ww`
|
||||
- `month`, `months`, `mm`, `m`
|
||||
- `quarter`, `quarters`, `qq`, `q`
|
||||
- `year`, `years`, `yyyy`, `yy`
|
||||
|
||||
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
@ -1763,16 +1764,17 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` `microseconds` `us` `u`
|
||||
- `millisecond` `milliseconds` `ms`
|
||||
- `second` `seconds` `ss` `s`
|
||||
- `minute` `minutes` `mi` `n`
|
||||
- `hour` `hours` `hh` `h`
|
||||
- `day` `days` `dd` `d`
|
||||
- `week` `weeks` `wk` `ww`
|
||||
- `month` `months` `mm` `m`
|
||||
- `quarter` `quarters` `qq` `q`
|
||||
- `year` `years` `yyyy` `yy`
|
||||
- `nanosecond`, `nanoseconds`, `ns`
|
||||
- `microsecond`, `microseconds`, `us`, `u`
|
||||
- `millisecond`, `milliseconds`, `ms`
|
||||
- `second`, `seconds`, `ss`, `s`
|
||||
- `minute`, `minutes`, `mi`, `n`
|
||||
- `hour`, `hours`, `hh`, `h`
|
||||
- `day`, `days`, `dd`, `d`
|
||||
- `week`, `weeks`, `wk`, `ww`
|
||||
- `month`, `months`, `mm`, `m`
|
||||
- `quarter`, `quarters`, `qq`, `q`
|
||||
- `year`, `years`, `yyyy`, `yy`
|
||||
|
||||
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
|
@ -543,12 +543,64 @@ You can get similar result by using the [ternary operator](../../sql-reference/f
|
||||
|
||||
Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
|
||||
|
||||
## hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’)
|
||||
## hasColumnInTable
|
||||
|
||||
Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `database` : name of the database. [String literal](../syntax#syntax-string-literal)
|
||||
- `table` : name of the table. [String literal](../syntax#syntax-string-literal)
|
||||
- `column` : name of the column. [String literal](../syntax#syntax-string-literal)
|
||||
- `hostname` : remote server name to perform the check on. [String literal](../syntax#syntax-string-literal)
|
||||
- `username` : username for remote server. [String literal](../syntax#syntax-string-literal)
|
||||
- `password` : password for remote server. [String literal](../syntax#syntax-string-literal)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` if the given column exists.
|
||||
- `0`, otherwise.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. If parameter `hostname` is given, the check is performed on a remote server.
|
||||
If the table does not exist, an exception is thrown.
|
||||
For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hasColumnInTable('system','metrics','metric')
|
||||
```
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT hasColumnInTable('system','metrics','non-existing_column')
|
||||
```
|
||||
|
||||
```response
|
||||
0
|
||||
```
|
||||
|
||||
## hasThreadFuzzer
|
||||
|
||||
Returns whether Thread Fuzzer is effective. It can be used in tests to prevent runs from being too long.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasThreadFuzzer();
|
||||
```
|
||||
|
||||
## bar
|
||||
|
||||
Builds a bar chart.
|
||||
|
@ -99,7 +99,7 @@ Alias: `OCTET_LENGTH`
|
||||
Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
Alias:
|
||||
- `CHAR_LENGTH``
|
||||
- `CHAR_LENGTH`
|
||||
- `CHARACTER_LENGTH`
|
||||
|
||||
## leftPad
|
||||
|
@ -74,6 +74,8 @@ SELECT
|
||||
position('Hello, world!', 'o', 7)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐
|
||||
│ 5 │ 9 │
|
||||
@ -479,9 +481,9 @@ Alias: `haystack NOT ILIKE pattern` (operator)
|
||||
|
||||
## ngramDistance
|
||||
|
||||
Calculates the 4-gram distance between a `haystack` string and a `needle` string. For that, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a Float32 between 0 and 1. The smaller the result is, the more strings are similar to each other. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1.
|
||||
Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other.
|
||||
|
||||
Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -489,15 +491,170 @@ Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseIns
|
||||
ngramDistance(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
**Implementation details**
|
||||
|
||||
This function will throw an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any non-constant `haystack` or `needle` arguments are more than 32Kb in size, then the distance is always 1.
|
||||
|
||||
**Examples**
|
||||
|
||||
The more similar two strings are to each other, the closer the result will be to 0 (identical).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistance('ClickHouse','ClickHouse!');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.06666667
|
||||
```
|
||||
|
||||
The less similar two strings are to each, the larger the result will be.
|
||||
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistance('ClickHouse','House');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.5555556
|
||||
```
|
||||
|
||||
## ngramDistanceCaseInsensitive
|
||||
|
||||
Provides a case-insensitive variant of [ngramDistance](#ngramdistance).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramDistanceCaseInsensitive(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
**Examples**
|
||||
|
||||
With [ngramDistance](#ngramdistance) differences in case will affect the similarity value:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistance('ClickHouse','clickhouse');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.71428573
|
||||
```
|
||||
|
||||
With [ngramDistanceCaseInsensitive](#ngramdistancecaseinsensitive) case is ignored so two identical strings differing only in case will now return a low similarity value:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistanceCaseInsensitive('ClickHouse','clickhouse');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0
|
||||
```
|
||||
|
||||
## ngramDistanceUTF8
|
||||
|
||||
Provides a UTF-8 variant of [ngramDistance](#ngramdistance). Assumes that `needle` and `haystack` strings are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramDistanceUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistanceUTF8('abcde','cde');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.5
|
||||
```
|
||||
|
||||
## ngramDistanceCaseInsensitiveUTF8
|
||||
|
||||
Provides a case-insensitive variant of [ngramDistanceUTF8](#ngramdistanceutf8).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramDistanceCaseInsensitiveUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramDistanceCaseInsensitiveUTF8('abcde','CDE');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.5
|
||||
```
|
||||
|
||||
## ngramSearch
|
||||
|
||||
Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`.
|
||||
Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex).
|
||||
|
||||
Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
:::note
|
||||
The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters.
|
||||
:::
|
||||
Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -505,6 +662,140 @@ The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram
|
||||
ngramSearch(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
**Implementation details**
|
||||
|
||||
:::note
|
||||
The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramSearch('Hello World','World Hello');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.5
|
||||
```
|
||||
|
||||
## ngramSearchCaseInsensitive
|
||||
|
||||
Provides a case-insensitive variant of [ngramSearch](#ngramSearch).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramSearchCaseInsensitive(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
The bigger the result is, the more likely `needle` is in the `haystack`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramSearchCaseInsensitive('Hello World','hello');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
## ngramSearchUTF8
|
||||
|
||||
Provides a UTF-8 variant of [ngramSearch](#ngramsearch) in which `needle` and `haystack` are assumed to be UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramSearchUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
The bigger the result is, the more likely `needle` is in the `haystack`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramSearchUTF8('абвгдеёжз', 'гдеёзд');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.5
|
||||
```
|
||||
|
||||
## ngramSearchCaseInsensitiveUTF8
|
||||
|
||||
Provides a case-insensitive variant of [ngramSearchUTF8](#ngramsearchutf8) in which `needle` and `haystack`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ngramSearchCaseInsensitiveUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
|
||||
|
||||
The bigger the result is, the more likely `needle` is in the `haystack`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT ngramSearchCaseInsensitiveUTF8('абвГДЕёжз', 'АбвгдЕЁжз');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
0.57142854
|
||||
```
|
||||
|
||||
## countSubstrings
|
||||
|
||||
Returns how often substring `needle` occurs in string `haystack`.
|
||||
@ -610,7 +901,7 @@ Like `countMatches(haystack, pattern)` but matching ignores the case.
|
||||
|
||||
## regexpExtract
|
||||
|
||||
Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index.
|
||||
Extracts the first string in `haystack` that matches the regexp pattern and corresponds to the regex group index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -652,7 +943,7 @@ Result:
|
||||
|
||||
## hasSubsequence
|
||||
|
||||
Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
|
||||
Returns 1 if `needle` is a subsequence of `haystack`, or 0 otherwise.
|
||||
A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
|
||||
|
||||
|
||||
@ -676,8 +967,10 @@ Type: `UInt8`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hasSubsequence('garbage', 'arg') ;
|
||||
SELECT hasSubsequence('garbage', 'arg');
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -692,10 +985,263 @@ Result:
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hasSubsequenceCaseInsensitive(haystack, needle)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1, if needle is a subsequence of haystack.
|
||||
- 0, otherwise.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hasSubsequenceCaseInsensitive('garbage', 'ARG');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequenceCaseInsensitive('garbage', 'ARG')─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hasSubsequenceUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1, if needle is a subsequence of haystack.
|
||||
- 0, otherwise.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
Query:
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
select hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система')─┐
|
||||
│ 1 │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1, if needle is a subsequence of haystack.
|
||||
- 0, otherwise.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
select hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА')─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasToken
|
||||
|
||||
Returns 1 if a given token is present in a haystack, or 0 otherwise.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasToken(haystack, token)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 1, if the token is present in the haystack.
|
||||
- 0, if the token is not present.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Token must be a constant string. Supported by tokenbf_v1 index specialization.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hasToken('Hello World','Hello');
|
||||
```
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
## hasTokenOrNull
|
||||
|
||||
Returns 1 if a given token is present, 0 if not present, and null if the token is ill-formed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasTokenOrNull(haystack, token)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 1, if the token is present in the haystack.
|
||||
- 0, if the token is not present in the haystack.
|
||||
- null, if the token is ill-formed.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Token must be a constant string. Supported by tokenbf_v1 index specialization.
|
||||
|
||||
**Example**
|
||||
|
||||
Where `hasToken` would throw an error for an ill-formed token, `hasTokenOrNull` returns `null` for an ill-formed token.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hasTokenOrNull('Hello World','Hello,World');
|
||||
```
|
||||
|
||||
```response
|
||||
null
|
||||
```
|
||||
|
||||
## hasTokenCaseInsensitive
|
||||
|
||||
Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasTokenCaseInsensitive(haystack, token)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 1, if the token is present in the haystack.
|
||||
- 0, otherwise.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Token must be a constant string. Supported by tokenbf_v1 index specialization.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hasTokenCaseInsensitive('Hello World','hello');
|
||||
```
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
## hasTokenCaseInsensitiveOrNull
|
||||
|
||||
Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case and returns null if the token is ill-formed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
hasTokenCaseInsensitiveOrNull(haystack, token)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- 1, if the token is present in the haystack.
|
||||
- 0, if token is not present.
|
||||
- null, if the token is ill-formed.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Token must be a constant string. Supported by tokenbf_v1 index specialization.
|
||||
|
||||
**Example**
|
||||
|
||||
|
||||
Where `hasTokenCaseInsensitive` would throw an error for an ill-formed token, `hasTokenCaseInsensitiveOrNull` returns `null` for an ill-formed token.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world');
|
||||
```
|
||||
|
||||
```response
|
||||
null
|
||||
```
|
@ -56,7 +56,9 @@ Entries for finished mutations are not deleted right away (the number of preserv
|
||||
|
||||
For non-replicated tables, all `ALTER` queries are performed synchronously. For replicated tables, the query just adds instructions for the appropriate actions to `ZooKeeper`, and the actions themselves are performed as soon as possible. However, the query can wait for these actions to be completed on all the replicas.
|
||||
|
||||
For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting.
|
||||
For `ALTER` queries that creates mutations (e.g.: including, but not limited to `UPDATE`, `DELETE`, `MATERIALIZE INDEX`, `MATERIALIZE PROJECTION`, `MATERIALIZE COLUMN`, `APPLY DELETED MASK`, `CLEAR STATISTIC`, `MATERIALIZE STATISTIC`) the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
|
||||
|
||||
For other `ALTER` queries which only modify the metadata, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting.
|
||||
|
||||
You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting.
|
||||
|
||||
@ -64,8 +66,6 @@ You can specify how long (in seconds) to wait for inactive replicas to execute a
|
||||
For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
|
||||
:::
|
||||
|
||||
For `ALTER TABLE ... UPDATE|DELETE|MATERIALIZE INDEX|MATERIALIZE PROJECTION|MATERIALIZE COLUMN` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
|
||||
|
||||
## Related content
|
||||
|
||||
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)
|
||||
|
@ -30,9 +30,11 @@ Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db.]name [ON CLUSTER cluster] [SYNC]
|
||||
DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC]
|
||||
```
|
||||
|
||||
Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted.
|
||||
|
||||
## DROP DICTIONARY
|
||||
|
||||
Deletes the dictionary.
|
||||
|
@ -64,6 +64,14 @@ RELOAD FUNCTIONS [ON CLUSTER cluster_name]
|
||||
RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
|
||||
```
|
||||
|
||||
## RELOAD ASYNCHRONOUS METRICS
|
||||
|
||||
Re-calculates all [asynchronous metrics](../../operations/system-tables/asynchronous_metrics.md). Since asynchronous metrics are periodically updated based on setting [asynchronous_metrics_update_period_s](../../operations/server-configuration-parameters/settings.md), updating them manually using this statement is typically not necessary.
|
||||
|
||||
```sql
|
||||
RELOAD ASYNCHRONOUS METRICS [ON CLUSTER cluster_name]
|
||||
```
|
||||
|
||||
## DROP DNS CACHE
|
||||
|
||||
Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
|
||||
|
@ -476,7 +476,7 @@ FROM
|
||||
|
||||
- `r1` - количество уникальных посетителей за 2020-01-01 (`cond1`).
|
||||
- `r2` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-02 (`cond1` и `cond2`).
|
||||
- `r3` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-03 (`cond1` и `cond3`).
|
||||
- `r3` - количество уникальных посетителей в период за 2020-01-01 и 2020-01-03 (`cond1` и `cond3`).
|
||||
|
||||
## uniqUpTo(N)(x) {#uniquptonx}
|
||||
|
||||
|
@ -120,7 +120,7 @@ FROM dt
|
||||
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md)
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format)
|
||||
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
|
||||
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
|
||||
|
@ -627,7 +627,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 наносекунду.
|
||||
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
|
||||
|
||||
**Синтаксис**
|
||||
@ -641,6 +641,7 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `nanosecond` (возможные сокращения: `ns`)
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
@ -716,6 +717,7 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `nanosecond` (возможные сокращения: `ns`)
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
|
@ -472,7 +472,7 @@ FROM
|
||||
|
||||
- `r1`-2020-01-01期间访问该网站的独立访问者数量( `cond1` 条件)。
|
||||
- `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。
|
||||
- `r3`-在2020-01-01和2020-01-03之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond3` 条件)。
|
||||
- `r3`-在2020-01-01和2020-01-03 网站的独立访客数量 (`cond1` 和 `cond3` 条件)。
|
||||
|
||||
## uniqUpTo(N)(x) {#uniquptonx}
|
||||
|
||||
|
@ -643,6 +643,7 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
|
||||
可能的值:
|
||||
|
||||
- `nanosecond`
|
||||
- `microsecond`
|
||||
- `millisecond`
|
||||
- `second`
|
||||
|
@ -1,128 +1,702 @@
|
||||
---
|
||||
slug: /zh/sql-reference/functions/string-search-functions
|
||||
---
|
||||
# 字符串搜索函数 {#zi-fu-chuan-sou-suo-han-shu}
|
||||
|
||||
下列所有函数在默认的情况下区分大小写。对于不区分大小写的搜索,存在单独的变体。
|
||||
# 字符串搜索函数
|
||||
|
||||
## 位置(大海捞针),定位(大海捞针) {#positionhaystack-needle-locatehaystack-needle}
|
||||
本节中的所有函数默认情况下都区分大小写进行搜索。不区分大小写的搜索通常由单独的函数变体提供。
|
||||
请注意,不区分大小写的搜索,遵循英语的小写-大写规则。
|
||||
例如。英语中大写的`i`是`I`,而在土耳其语中则是`İ`, 对于英语以外的语言,结果可能会不符合预期。
|
||||
|
||||
在字符串`haystack`中搜索子串`needle`。
|
||||
返回子串的位置(以字节为单位),从1开始,如果未找到子串,则返回0。
|
||||
本节中的函数还假设搜索字符串和被搜索字符串是单字节编码文本(例如ASCII)。如果违反此假设,不会抛出异常且结果为undefined。
|
||||
UTF-8 编码字符串的搜索通常由单独的函数变体提供。同样,如果使用 UTF-8 函数变体但输入字符串不是 UTF-8 编码文本,不会抛出异常且结果为 undefined。
|
||||
需要注意,函数不会执行自动 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。
|
||||
在[字符串函数](string-functions.md) 和 [字符串替换函数](string-replace-functions.md) 会分别说明.
|
||||
|
||||
对于不区分大小写的搜索,请使用函数`positionCaseInsensitive`。
|
||||
## position
|
||||
|
||||
## positionUTF8(大海捞针) {#positionutf8haystack-needle}
|
||||
返回字符串`haystack`中子字符串`needle`的位置(以字节为单位,从 1 开始)。
|
||||
|
||||
与`position`相同,但位置以Unicode字符返回。此函数工作在UTF-8编码的文本字符集中。如非此编码的字符集,则返回一些非预期结果(他不会抛出异常)。
|
||||
**语法**
|
||||
|
||||
对于不区分大小写的搜索,请使用函数`positionCaseInsensitiveUTF8`。
|
||||
``` sql
|
||||
position(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
## 多搜索分配(干草堆,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchallpositionshaystack-needle1-needle2-needlen}
|
||||
别名:
|
||||
- `position(needle IN haystack)`
|
||||
|
||||
与`position`相同,但函数返回一个数组,其中包含所有匹配needle<sub>我</sub>的位置。
|
||||
**参数**
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAllPositionsCaseInsensitive,multiSearchAllPositionsUTF8,multiSearchAllPositionsCaseInsensitiveUTF8`。
|
||||
- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — 进行查询的子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。
|
||||
|
||||
## multiSearchFirstPosition(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen}
|
||||
**返回值**
|
||||
|
||||
与`position`相同,但返回在`haystack`中与needles字符串匹配的最左偏移。
|
||||
- 若子字符串存在,返回位置(以字节为单位,从 1 开始)。
|
||||
- 如果不存在子字符串,返回 0。
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstPositionCaseInsensitive,multiSearchFirstPositionUTF8,multiSearchFirstPositionCaseInsensitiveUTF8`。
|
||||
如果子字符串 `needle` 为空,则:
|
||||
- 如果未指定 `start_pos`,返回 `1`
|
||||
- 如果 `start_pos = 0`,则返回 `1`
|
||||
- 如果 `start_pos >= 1` 且 `start_pos <= length(haystack) + 1`,则返回 `start_pos`
|
||||
- 否则返回 `0`
|
||||
|
||||
## multiSearchFirstIndex(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen}
|
||||
以上规则同样在这些函数中生效: [locate](#locate), [positionCaseInsensitive](#positionCaseInsensitive), [positionUTF8](#positionUTF8), [positionCaseInsensitiveUTF8](#positionCaseInsensitiveUTF8)
|
||||
|
||||
返回在字符串`haystack`中最先查找到的needle<sub>我</sub>的索引`i`(从1开始),没有找到任何匹配项则返回0。
|
||||
数据类型: `Integer`.
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstIndexCaseInsensitive,multiSearchFirstIndexUTF8,multiSearchFirstIndexCaseInsensitiveUTF8`。
|
||||
**示例**
|
||||
|
||||
## 多搜索(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchanyhaystack-needle1-needle2-needlen}
|
||||
``` sql
|
||||
SELECT position('Hello, world!', '!');
|
||||
```
|
||||
|
||||
如果`haystack`中至少存在一个needle<sub>我</sub>匹配则返回1,否则返回0。
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─position('Hello, world!', '!')─┐
|
||||
│ 13 │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
示例,使用参数 `start_pos` :
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
position('Hello, world!', 'o', 1),
|
||||
position('Hello, world!', 'o', 7)
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐
|
||||
│ 5 │ 9 │
|
||||
└───────────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
示例,`needle IN haystack`:
|
||||
|
||||
```sql
|
||||
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
```text
|
||||
┌─equals(6, position(s, '/'))─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
示例,子字符串 `needle` 为空:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
position('abc', ''),
|
||||
position('abc', '', 0),
|
||||
position('abc', '', 1),
|
||||
position('abc', '', 2),
|
||||
position('abc', '', 3),
|
||||
position('abc', '', 4),
|
||||
position('abc', '', 5)
|
||||
```
|
||||
结果:
|
||||
``` text
|
||||
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
|
||||
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
|
||||
└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
## locate
|
||||
|
||||
类似于 [position](#position) 但交换了 `haystack` 和 `locate` 参数。
|
||||
|
||||
此函数的行为取决于 ClickHouse 版本:
|
||||
- 在 v24.3 以下的版本中,`locate` 是函数`position`的别名,参数为 `(haystack, needle[, start_pos])`。
|
||||
- 在 v24.3 及以上的版本中,, `locate` 是独立的函数 (以更好地兼容 MySQL) ,参数为 `(needle, haystack[, start_pos])`。 之前的行为
|
||||
可以在设置中恢复 [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order);
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
locate(needle, haystack[, start_pos])
|
||||
```
|
||||
|
||||
## positionCaseInsensitive
|
||||
|
||||
类似于 [position](#position) 但是不区分大小写。
|
||||
|
||||
## positionUTF8
|
||||
|
||||
类似于 [position](#position) 但是假定 `haystack` 和 `needle` 是 UTF-8 编码的字符串。
|
||||
|
||||
**示例**
|
||||
|
||||
函数 `positionUTF8` 可以正确的将字符 `ö` 计为单个 Unicode 代码点(`ö`由两个点表示):
|
||||
|
||||
``` sql
|
||||
SELECT positionUTF8('Motörhead', 'r');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─position('Motörhead', 'r')─┐
|
||||
│ 5 │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## positionCaseInsensitiveUTF8
|
||||
|
||||
类似于 [positionUTF8](#positionutf8) 但是不区分大小写。
|
||||
|
||||
## multiSearchAllPositions
|
||||
|
||||
类似于 [position](#position) 但是返回多个在字符串 `haystack` 中 `needle` 子字符串的位置的数组(以字节为单位,从 1 开始)。
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAnyCaseInsensitive,multiSearchAnyUTF8,multiSearchAnyCaseInsensitiveUTF8`。
|
||||
|
||||
:::note
|
||||
在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于2<sup>8</sup>。
|
||||
所有以 `multiSearch*()` 开头的函数仅支持最多 2<sup>8</sup> 个`needle`.
|
||||
:::
|
||||
|
||||
## 匹配(大海捞针,模式) {#matchhaystack-pattern}
|
||||
**语法**
|
||||
|
||||
检查字符串是否与`pattern`正则表达式匹配。`pattern`可以是一个任意的`re2`正则表达式。 `re2`正则表达式的[语法](https://github.com/google/re2/wiki/Syntax)比Perl正则表达式的语法存在更多限制。
|
||||
``` sql
|
||||
multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
|
||||
```
|
||||
|
||||
如果不匹配返回0,否则返回1。
|
||||
**参数**
|
||||
|
||||
请注意,反斜杠符号(`\`)用于在正则表达式中转义。由于字符串中采用相同的符号来进行转义。因此,为了在正则表达式中转义符号,必须在字符串文字中写入两个反斜杠(\\)。
|
||||
- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — 子字符串数组, 类型为[Array](../../sql-reference/data-types/array.md)
|
||||
|
||||
正则表达式与字符串一起使用,就像它是一组字节一样。正则表达式中不能包含空字节。
|
||||
对于在字符串中搜索子字符串的模式,最好使用LIKE或«position»,因为它们更加高效。
|
||||
**返回值**
|
||||
|
||||
## multiMatchAny(大海捞针,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyhaystack-pattern1-pattern2-patternn}
|
||||
- 位置数组,数组中的每个元素对应于 `needle` 数组中的一个元素。如果在 `haystack` 中找到子字符串,则返回的数组中的元素为子字符串的位置(以字节为单位,从 1 开始);如果未找到子字符串,则返回的数组中的元素为 0。
|
||||
|
||||
与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─multiSearchAllPositions('Hello, World!', ['hello', '!', 'world'])─┐
|
||||
│ [0,13,0] │
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## multiSearchAllPositionsUTF8
|
||||
|
||||
类似于 [multiSearchAllPositions](#multiSearchAllPositions) ,但假定 `haystack` 和 `needle`-s 是 UTF-8 编码的字符串。
|
||||
|
||||
## multiSearchFirstPosition
|
||||
|
||||
类似于 `position` , 在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其位置。
|
||||
|
||||
函数 `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` 和 `multiSearchFirstPositionCaseInsensitiveUTF8` 提供此函数的不区分大小写 以及/或 UTF-8 变体。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN])
|
||||
```
|
||||
|
||||
## multiSearchFirstIndex
|
||||
|
||||
在字符串`haystack`中匹配最左侧的 needle<sub>i</sub> 子字符串,返回其索引 `i` (从1开始),如无法匹配则返回0。
|
||||
|
||||
函数 `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` 和 `multiSearchFirstIndexCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## multiSearchAny {#multisearchany}
|
||||
|
||||
至少已有一个子字符串`needle`匹配 `haystack` 时返回1,否则返回 0 。
|
||||
|
||||
函数 `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` 和 `multiSearchAnyCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
|
||||
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiSearchAny(haystack, [needle1, needle2, …, needleN])
|
||||
```
|
||||
|
||||
## match {#match}
|
||||
|
||||
返回字符串 `haystack` 是否匹配正则表达式 `pattern` ([re2正则语法参考](https://github.com/google/re2/wiki/Syntax)
|
||||
|
||||
匹配基于 UTF-8,例如`.` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。T正则表达式不得包含空字节。如果 `haystack` 或`pattern`不是有效的 UTF-8,则此行为为undefined。
|
||||
与 re2 的默认行为不同,`.` 会匹配换行符。要禁用此功能,请在模式前面添加`(?-s)`。
|
||||
|
||||
如果仅希望搜索子字符串,可以使用函数 [like](#like)或 [position](#position) 来替代,这些函数的性能比此函数更高。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
match(haystack, pattern)
|
||||
```
|
||||
|
||||
别名: `haystack REGEXP pattern operator`
|
||||
|
||||
## multiMatchAny
|
||||
|
||||
类似于 `match`,如果至少有一个表达式 `pattern<sub>i</sub>` 匹配字符串 `haystack`,则返回1,否则返回0。
|
||||
|
||||
:::note
|
||||
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节,否则抛出异常。这种限制是因为hyperscan API而产生的。
|
||||
`multi[Fuzzy]Match*()` 函数家族使用了(Vectorscan)[https://github.com/VectorCamp/vectorscan]库. 因此,只有当 ClickHouse 编译时支持矢量扫描时,它们才会启用。
|
||||
|
||||
要关闭所有使用矢量扫描(hyperscan)的功能,请使用设置 `SET allow_hyperscan = 0;`。
|
||||
|
||||
由于Vectorscan的限制,`haystack` 字符串的长度必须小于2<sup>32</sup>字节。
|
||||
|
||||
Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关更多信息,请参见
|
||||
[https://www.usenix.org/conference/usenixsecurity22/presentation/turonova](https://www.usenix.org/conference/usenixsecurity22/presentation/turonova)
|
||||
[https://doi.org/10.1007/s10664-021-10033-1](https://doi.org/10.1007/s10664-021-10033-1)
|
||||
[https://doi.org/10.1145/3236024.3236027](https://doi.org/10.1145/3236024.3236027)
|
||||
建议用户谨慎检查提供的表达式。
|
||||
|
||||
:::
|
||||
|
||||
## multiMatchAnyIndex(大海捞针,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
|
||||
如果仅希望搜索子字符串,可以使用函数 [multiSearchAny](#multisearchany) 来替代,这些函数的性能比此函数更高。
|
||||
|
||||
与`multiMatchAny`相同,但返回与haystack匹配的任何内容的索引位置。
|
||||
**语法**
|
||||
|
||||
## multiFuzzyMatchAny(干草堆,距离,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn}
|
||||
```sql
|
||||
multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
与`multiMatchAny`相同,但如果在haystack能够查找到任何模式匹配能够在指定的[编辑距离](https://en.wikipedia.org/wiki/Edit_distance)内进行匹配,则返回1。此功能也处于实验模式,可能非常慢。有关更多信息,请参阅[hyperscan文档](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching)。
|
||||
## multiMatchAnyIndex
|
||||
|
||||
## multiFuzzyMatchAnyIndex(大海捞针,距离,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn}
|
||||
类似于 `multiMatchAny` ,返回任何子串匹配 `haystack` 的索引。
|
||||
|
||||
与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## multiMatchAllIndices
|
||||
|
||||
类似于 `multiMatchAny`,返回一个数组,包含所有匹配 `haystack` 的子字符串的索引。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## multiFuzzyMatchAny
|
||||
|
||||
类似于 `multiMatchAny` ,如果任一 `pattern` 匹配 `haystack`,则返回1 within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). 该功能依赖于实验特征 [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) 库,并且对于某些边缘场景可能会很慢。性能取决于编辑距离`distance`的值和使用的`partten`,但与非模糊搜索相比,它的开销总是更高的。
|
||||
|
||||
:::note
|
||||
`multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。
|
||||
由于 hyperscan 的限制,`multiFuzzyMatch*()` 函数族不支持 UTF-8 正则表达式(hyperscan以一串字节来处理)。
|
||||
:::
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## multiFuzzyMatchAnyIndex
|
||||
|
||||
类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的任何索引
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## multiFuzzyMatchAllIndices
|
||||
|
||||
类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的所有索引的数组。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
```
|
||||
|
||||
## extract
|
||||
|
||||
使用正则表达式提取字符串。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。
|
||||
|
||||
对于没有子模式的正则表达式,该函数使用与整个正则表达式匹配的片段。否则,它使用与第一个子模式匹配的片段。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
extract(haystack, pattern)
|
||||
```
|
||||
|
||||
## extractAll
|
||||
|
||||
使用正则表达式提取字符串的所有片段。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。
|
||||
|
||||
返回所有匹配项组成的字符串数组。
|
||||
|
||||
子模式的行为与函数`extract`中的行为相同。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
extractAll(haystack, pattern)
|
||||
```
|
||||
|
||||
## extractAllGroupsHorizontal
|
||||
|
||||
使用`pattern`正则表达式匹配`haystack`字符串的所有组。
|
||||
|
||||
返回一个元素为数组的数组,其中第一个数组包含与第一组匹配的所有片段,第二个数组包含与第二组匹配的所有片段,依此类推。
|
||||
|
||||
这个函数相比 [extractAllGroupsVertical](#extractallgroups-vertical)更慢。
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
extractAllGroupsHorizontal(haystack, pattern)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含 group,每个 group 用括号括起来。 如果 `pattern` 不包含 group 则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**返回值**
|
||||
|
||||
- 数据类型: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组的数组。
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
|
||||
│ [['abc','def','ghi'],['111','222','333']] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## extractAllGroupsVertical
|
||||
|
||||
使用正则表达式 `pattern`匹配字符串`haystack`中的所有group。返回一个数组,其中每个数组包含每个group的匹配片段。片段按照在`haystack`中出现的顺序进行分组。
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
extractAllGroupsVertical(haystack, pattern)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**返回值**
|
||||
|
||||
- 数据类型: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组。
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
|
||||
│ [['abc','111'],['def','222'],['ghi','333']] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## like {#like}
|
||||
|
||||
返回字符串 `haystack` 是否匹配 LIKE 表达式 `pattern`。
|
||||
|
||||
一个 LIKE 表达式可以包含普通字符和以下元字符:
|
||||
|
||||
- `%` 表示任意数量的任意字符(包括零个字符)。
|
||||
- `_` 表示单个任意字符。
|
||||
- `\` 用于转义文字 `%`, `_` 和 `\`。
|
||||
|
||||
匹配基于 UTF-8,例如 `_` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。
|
||||
|
||||
如果 `haystack` 或 `LIKE` 表达式不是有效的 UTF-8,则行为是未定义的。
|
||||
|
||||
不会自动执行 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。
|
||||
|
||||
如果需要匹配字符 `%`, `_` 和 `/`(这些是 LIKE 元字符),请在其前面加上反斜杠:`\%`, `\_` 和 `\\`。
|
||||
如果在非 `%`, `_` 或 `\` 字符前使用反斜杠,则反斜杠将失去其特殊含义(即被解释为字面值)。
|
||||
请注意,ClickHouse 要求字符串中使用反斜杠 [也需要被转义](../syntax.md#string), 因此您实际上需要编写 `\\%`、`\\_` 和 `\\\\`。
|
||||
|
||||
|
||||
对于形式为 `%needle%` 的 LIKE 表达式,函数的性能与 `position` 函数相同。
|
||||
所有其他 LIKE 表达式都会被内部转换为正则表达式,并以与函数 `match` 相似的性能执行。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
like(haystack, pattern)
|
||||
```
|
||||
|
||||
别名: `haystack LIKE pattern` (operator)
|
||||
|
||||
## notLike {#notlike}
|
||||
|
||||
类似于 `like` 但是返回相反的结果。
|
||||
|
||||
别名: `haystack NOT LIKE pattern` (operator)
|
||||
|
||||
## ilike
|
||||
|
||||
类似于 `like` 但是不区分大小写。
|
||||
|
||||
别名: `haystack ILIKE pattern` (operator)
|
||||
|
||||
## notILike
|
||||
|
||||
类似于 `ilike` 但是返回相反的结果。
|
||||
|
||||
别名: `haystack NOT ILIKE pattern` (operator)
|
||||
|
||||
## ngramDistance
|
||||
|
||||
计算字符串 `haystack` 和子字符串 `needle` 的 4-gram 距离。 为此,它计算两个 4-gram 多重集之间的对称差异,并通过它们的基数之和对其进行标准化。返回 0 到 1 之间的 Float32 浮点数。返回值越小,代表字符串越相似. 如果参数 `needle` or `haystack` 是常数且大小超过 32Kb,则抛出异常。如果参数 `haystack` 或 `needle` 是非常数且大小超过 32Kb ,则返回值恒为 1。
|
||||
|
||||
函数 `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
|
||||
|
||||
**语法**
|
||||
|
||||
```sql
|
||||
ngramDistance(haystack, needle)
|
||||
```
|
||||
|
||||
## ngramSearch
|
||||
|
||||
类似于`ngramDistance`,但计算`needle`字符串和 `haystack` 字符串之间的非对称差异,即来自 `needle` 的 n-gram 数量减去由`needle`数量归一化的 n-gram 的公共数量 n-gram。返回 0 到 1 之间的 Float32 浮点数。结果越大,`needle` 越有可能在 `haystack` 中。该函数对于模糊字符串搜索很有用。另请参阅函数 `soundex``。
|
||||
|
||||
函数 `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
|
||||
|
||||
:::note
|
||||
如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。
|
||||
UTF-8 变体使用了 3-gram 距离。这些并不是完全公平的 n-gram 距离。我们使用 2 字节的哈希函数来哈希 n-gram,然后计算这些哈希表之间的(非)对称差异——可能会发生冲突。在使用 UTF-8 大小写不敏感格式时,我们并不使用公平的 `tolower` 函数——我们将每个码点字节的第 5 位(从零开始)和第零字节的第一个比特位位置为零(如果该串的大小超过一个字节)——这对拉丁字母和大部分西里尔字母都有效。
|
||||
:::
|
||||
|
||||
## 提取(大海捞针,图案) {#extracthaystack-pattern}
|
||||
**语法**
|
||||
|
||||
使用正则表达式截取字符串。如果’haystack’与’pattern’不匹配,则返回空字符串。如果正则表达式中不包含子模式,它将获取与整个正则表达式匹配的子串。否则,它将获取与第一个子模式匹配的子串。
|
||||
```sql
|
||||
ngramSearch(haystack, needle)
|
||||
```
|
||||
|
||||
## extractAll(大海捞针,图案) {#extractallhaystack-pattern}
|
||||
## countSubstrings
|
||||
|
||||
使用正则表达式提取字符串的所有片段。如果’haystack’与’pattern’正则表达式不匹配,则返回一个空字符串。否则返回所有与正则表达式匹配的字符串数组。通常,行为与’extract’函数相同(它采用第一个子模式,如果没有子模式,则采用整个表达式)。
|
||||
返回字符串 `haystack` 中子字符串 `needle` 出现的次数。
|
||||
|
||||
## 像(干草堆,模式),干草堆像模式运算符 {#likehaystack-pattern-haystack-like-pattern-operator}
|
||||
函数 `countSubstringsCaseInsensitive` 和 `countSubstringsCaseInsensitiveUTF8` 提供此函数的不区分大小写以及 UTF-8 变体。
|
||||
|
||||
检查字符串是否与简单正则表达式匹配。
|
||||
正则表达式可以包含的元符号有`%`和`_`。
|
||||
**语法**
|
||||
|
||||
`%` 表示任何字节数(包括零字符)。
|
||||
``` sql
|
||||
countSubstrings(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
`_` 表示任何一个字节。
|
||||
**参数**
|
||||
|
||||
可以使用反斜杠(`\`)来对元符号进行转义。请参阅«match»函数说明中有关转义的说明。
|
||||
- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — 用于搜索的模式子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – 在字符串`haystack` 中开始检索的位置(从 1 开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。
|
||||
|
||||
对于像`%needle%`这样的正则表达式,改函数与`position`函数一样快。
|
||||
对于其他正则表达式,函数与’match’函数相同。
|
||||
**返回值**
|
||||
|
||||
## 不喜欢(干草堆,模式),干草堆不喜欢模式运算符 {#notlikehaystack-pattern-haystack-not-like-pattern-operator}
|
||||
- 子字符串出现的次数。
|
||||
|
||||
与’like’函数返回相反的结果。
|
||||
数据类型: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
## 大海捞针) {#ngramdistancehaystack-needle}
|
||||
**示例**
|
||||
|
||||
基于4-gram计算`haystack`和`needle`之间的距离:计算两个4-gram集合之间的对称差异,并用它们的基数和对其进行归一化。返回0到1之间的任何浮点数 – 越接近0则表示越多的字符串彼此相似。如果常量的`needle`或`haystack`超过32KB,函数将抛出异常。如果非常量的`haystack`或`needle`字符串超过32Kb,则距离始终为1。
|
||||
``` sql
|
||||
SELECT countSubstrings('aaaa', 'aa');
|
||||
```
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramDistanceCaseInsensitive,ngramDistanceUTF8,ngramDistanceCaseInsensitiveUTF8`。
|
||||
结果:
|
||||
|
||||
## ツ暗ェツ氾环催ツ団ツ法ツ人) {#ngramsearchhaystack-needle}
|
||||
``` text
|
||||
┌─countSubstrings('aaaa', 'aa')─┐
|
||||
│ 2 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
与`ngramDistance`相同,但计算`needle`和`haystack`之间的非对称差异——`needle`的n-gram减去`needle`归一化n-gram。可用于模糊字符串搜索。
|
||||
示例,使用参数 `start_pos` :
|
||||
|
||||
对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramSearchCaseInsensitive,ngramSearchUTF8,ngramSearchCaseInsensitiveUTF8`。
|
||||
```sql
|
||||
SELECT countSubstrings('abc___abc', 'abc', 4);
|
||||
```
|
||||
|
||||
:::note
|
||||
对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
|
||||
:::
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('abc___abc', 'abc', 4)─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countMatches
|
||||
|
||||
返回正则表达式 `pattern` 在 `haystack` 中成功匹配的次数。
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
countMatches(haystack, pattern)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax)) 数据类型为[String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**返回值**
|
||||
|
||||
- 匹配次数。
|
||||
|
||||
数据类型: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT countMatches('foobar.com', 'o+');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─countMatches('foobar.com', 'o+')─┐
|
||||
│ 2 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT countMatches('aaaa', 'aa');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─countMatches('aaaa', 'aa')────┐
|
||||
│ 2 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## countMatchesCaseInsensitive
|
||||
|
||||
类似于 `countMatches(haystack, pattern)` 但是不区分大小写。
|
||||
|
||||
## regexpExtract
|
||||
|
||||
提取匹配正则表达式模式的字符串`haystack`中的第一个字符串,并对应于正则表达式组索引。
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
regexpExtract(haystack, pattern[, index])
|
||||
```
|
||||
|
||||
别名: `REGEXP_EXTRACT(haystack, pattern[, index])`.
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` — 被匹配字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `pattern` — 正则表达式,必须是常量。类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `index` – 一个大于等于 0 的整数,默认为 1 ,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。
|
||||
|
||||
**返回值**
|
||||
|
||||
`pattern`可以包含多个正则组, `index` 代表要提取哪个正则表达式组。如果 `index` 为 0,则返回整个匹配的字符串。
|
||||
|
||||
数据类型: `String`.
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
regexpExtract('100-200', '(\\d+)-(\\d+)', 1),
|
||||
regexpExtract('100-200', '(\\d+)-(\\d+)', 2),
|
||||
regexpExtract('100-200', '(\\d+)-(\\d+)', 0),
|
||||
regexpExtract('100-200', '(\\d+)-(\\d+)');
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐
|
||||
│ 100 │ 200 │ 100-200 │ 100 │
|
||||
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequence
|
||||
|
||||
如果`needle`是`haystack`的子序列,返回1,否则返回0。
|
||||
子序列是从给定字符串中删除零个或多个元素而不改变剩余元素的顺序得到的序列。
|
||||
|
||||
**语法**
|
||||
|
||||
``` sql
|
||||
hasSubsequence(haystack, needle)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — 搜索子序列,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
**返回值**
|
||||
|
||||
- 1, 如果`needle`是`haystack`的子序列
|
||||
- 0, 如果`needle`不是`haystack`的子序列
|
||||
|
||||
数据类型: `UInt8`.
|
||||
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
SELECT hasSubsequence('garbage', 'arg') ;
|
||||
```
|
||||
|
||||
结果:
|
||||
|
||||
``` text
|
||||
┌─hasSubsequence('garbage', 'arg')─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasSubsequenceCaseInsensitive
|
||||
类似于 [hasSubsequence](#hasSubsequence) 但是不区分大小写。
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
类似于 [hasSubsequence](#hasSubsequence) 但是假定 `haystack` 和 `needle` 是 UTF-8 编码的字符串。
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
类似于 [hasSubsequenceUTF8](#hasSubsequenceUTF8) 但是不区分大小写。
|
||||
|
@ -17,12 +17,13 @@
|
||||
|
||||
#include <Access/AccessControl.h>
|
||||
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/Config/getClientConfigPath.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/formatReadable.h>
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
@ -237,7 +237,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
ASTPtr res = parseQueryAndMovePosition(
|
||||
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);
|
||||
|
||||
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
|
||||
std::unique_ptr<ReadBuffer> insert_query_payload;
|
||||
/// If the query is INSERT ... VALUES, then we will try to parse the data.
|
||||
if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
|
||||
{
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib
|
||||
|
||||
private:
|
||||
std::unique_ptr<ExternalDictionaryLibraryAPI::CString[]> ptr_holder = nullptr;
|
||||
std::unique_ptr<ExternalDictionaryLibraryAPI::CString[]> ptr_holder;
|
||||
Container strings_holder;
|
||||
};
|
||||
|
||||
|
@ -96,7 +96,7 @@
|
||||
<to>https://{bucket}.s3.amazonaws.com</to>
|
||||
</s3>
|
||||
<gs>
|
||||
<to>https://{bucket}.storage.googleapis.com</to>
|
||||
<to>https://storage.googleapis.com/{bucket}</to>
|
||||
</gs>
|
||||
<oss>
|
||||
<to>https://{bucket}.oss.aliyuncs.com</to>
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Access/AccessRights.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/sort.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <unordered_map>
|
||||
|
@ -205,7 +205,7 @@ enum class AccessType
|
||||
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
|
||||
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT, SYSTEM WAIT FAILPOINT", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
|
||||
|
@ -115,34 +115,34 @@ public:
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(*columns[0], row_num, arena);
|
||||
data(place).add(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
this->data(place).addManyDefaults(*columns[0], 0, arena);
|
||||
data(place).addManyDefaults(*columns[0], 0, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(this->data(rhs), arena);
|
||||
data(place).add(data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf, *serialization);
|
||||
data(place).write(buf, *serialization);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, *serialization, arena);
|
||||
data(place).read(buf, *serialization, arena);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).insertResultInto(to);
|
||||
data(place).insertResultInto(to);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
|
||||
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
|
||||
|
||||
|
@ -559,7 +559,7 @@ public:
|
||||
ptr = ptrs[row_num];
|
||||
}
|
||||
|
||||
this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
|
||||
data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
@ -572,7 +572,7 @@ public:
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), arena);
|
||||
data(place).merge(data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t> /* version */) const override
|
||||
@ -590,7 +590,7 @@ public:
|
||||
auto & array = assert_cast<ColumnArray &>(to);
|
||||
auto & str = assert_cast<ColumnString &>(array.getData());
|
||||
|
||||
this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
|
||||
data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
|
||||
|
||||
array.getOffsets().push_back(str.size());
|
||||
}
|
||||
|
@ -89,10 +89,10 @@ struct GroupArraySamplerData
|
||||
chassert(lim != 0);
|
||||
|
||||
/// With a large number of values, we will generate random numbers several times slower.
|
||||
if (lim <= static_cast<UInt64>(rng.max()))
|
||||
if (lim <= static_cast<UInt64>(pcg32_fast::max()))
|
||||
return rng() % lim;
|
||||
else
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(pcg32::max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
|
||||
}
|
||||
|
||||
void randomShuffle()
|
||||
|
@ -242,7 +242,7 @@ public:
|
||||
{
|
||||
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
|
||||
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
|
||||
this->data(place).add(x, y, arena);
|
||||
data(place).add(x, y, arena);
|
||||
}
|
||||
|
||||
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
|
||||
@ -264,25 +264,25 @@ public:
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
auto & a = data(place);
|
||||
const auto & b = data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||
{
|
||||
auto res = this->data(place).getResult(total_buckets, arena);
|
||||
auto res = data(place).getResult(total_buckets, arena);
|
||||
|
||||
auto & col = assert_cast<ColumnArray &>(to);
|
||||
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
|
||||
|
@ -205,35 +205,35 @@ public:
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
auto & a = data(place);
|
||||
const auto & b = data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
if (!data(place).size_x || !data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
|
||||
auto [u_statistic, p_value] = data(place).getResult(alternative, continuity_correction);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
@ -66,31 +66,31 @@ public:
|
||||
{
|
||||
Float64 new_x = columns[0]->getFloat64(row_num);
|
||||
Float64 new_y = columns[1]->getFloat64(row_num);
|
||||
this->data(place).addX(new_x, arena);
|
||||
this->data(place).addY(new_y, arena);
|
||||
data(place).addX(new_x, arena);
|
||||
data(place).addY(new_y, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
const auto & b = this->data(rhs);
|
||||
auto & a = data(place);
|
||||
const auto & b = data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto answer = this->data(place).getResult();
|
||||
auto answer = data(place).getResult();
|
||||
|
||||
auto & column = static_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(answer);
|
||||
|
@ -102,24 +102,24 @@ public:
|
||||
auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
|
||||
if (event)
|
||||
{
|
||||
this->data(place).add(i);
|
||||
data(place).add(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
@ -130,13 +130,13 @@ public:
|
||||
ColumnArray::Offset current_offset = data_to.size();
|
||||
data_to.resize(current_offset + events_size);
|
||||
|
||||
const bool first_flag = this->data(place).events.test(0);
|
||||
const bool first_flag = data(place).events.test(0);
|
||||
data_to[current_offset] = first_flag;
|
||||
++current_offset;
|
||||
|
||||
for (size_t i = 1; i < events_size; ++i)
|
||||
{
|
||||
data_to[current_offset] = (first_flag && this->data(place).events.test(i));
|
||||
data_to[current_offset] = (first_flag && data(place).events.test(i));
|
||||
++current_offset;
|
||||
}
|
||||
|
||||
|
@ -123,22 +123,22 @@ public:
|
||||
Float64 x = columns[0]->getFloat64(row_num);
|
||||
Float64 y = columns[1]->getFloat64(row_num);
|
||||
|
||||
this->data(place).add(x, y);
|
||||
data(place).add(x, y);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
data(place).merge(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
@ -168,8 +168,8 @@ public:
|
||||
IColumn & to,
|
||||
Arena *) const override
|
||||
{
|
||||
Float64 k = this->data(place).getK();
|
||||
Float64 b = this->data(place).getB(k);
|
||||
Float64 k = data(place).getK();
|
||||
Float64 b = data(place).getB(k);
|
||||
|
||||
auto & col_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & col_k = assert_cast<ColumnVector<Float64> &>(col_tuple.getColumn(0));
|
||||
|
@ -120,7 +120,7 @@ public:
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(*columns[0], row_num, arena);
|
||||
data(place).add(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
@ -131,7 +131,7 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (this->data(place).isNull())
|
||||
if (data(place).isNull())
|
||||
return;
|
||||
IAggregateFunctionDataHelper<Data, AggregateFunctionSingleValueOrNull>::addBatchSinglePlace(
|
||||
row_begin, row_end, place, columns, arena, if_argument_pos);
|
||||
@ -146,7 +146,7 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (this->data(place).isNull())
|
||||
if (data(place).isNull())
|
||||
return;
|
||||
IAggregateFunctionDataHelper<Data, AggregateFunctionSingleValueOrNull>::addBatchSinglePlaceNotNull(
|
||||
row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
|
||||
@ -154,29 +154,29 @@ public:
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(*columns[0], 0, arena);
|
||||
data(place).add(*columns[0], 0, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(this->data(rhs), arena);
|
||||
data(place).add(data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf, *serialization);
|
||||
data(place).write(buf, *serialization);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, *serialization, arena);
|
||||
data(place).read(buf, *serialization, arena);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).insertResultInto(to);
|
||||
data(place).insertResultInto(to);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -150,13 +150,13 @@ private:
|
||||
|
||||
Float64 getResult(ConstAggregateDataPtr __restrict place) const
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
const auto & dt = data(place);
|
||||
switch (kind)
|
||||
{
|
||||
case VarKind::varSampStable: return getVarSamp(data.m2, data.count);
|
||||
case VarKind::stddevSampStable: return getStddevSamp(data.m2, data.count);
|
||||
case VarKind::varPopStable: return getVarPop(data.m2, data.count);
|
||||
case VarKind::stddevPopStable: return getStddevPop(data.m2, data.count);
|
||||
case VarKind::varSampStable: return getVarSamp(dt.m2, dt.count);
|
||||
case VarKind::stddevSampStable: return getStddevSamp(dt.m2, dt.count);
|
||||
case VarKind::varPopStable: return getVarPop(dt.m2, dt.count);
|
||||
case VarKind::stddevPopStable: return getStddevPop(dt.m2, dt.count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,22 +182,22 @@ public:
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
this->data(place).update(*columns[0], row_num);
|
||||
data(place).update(*columns[0], row_num);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).mergeWith(this->data(rhs));
|
||||
data(place).mergeWith(data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
|
@ -457,9 +457,9 @@ public:
|
||||
detail::Adder<T, Data>::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map);
|
||||
}
|
||||
|
||||
bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed;}
|
||||
bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; }
|
||||
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
if constexpr (is_parallelize_merge_prepare_needed)
|
||||
{
|
||||
@ -469,7 +469,7 @@ public:
|
||||
for (size_t i = 0; i < data_vec.size(); ++i)
|
||||
data_vec[i] = &this->data(places[i]).set;
|
||||
|
||||
DataSet::parallelizeMergePrepare(data_vec, thread_pool);
|
||||
DataSet::parallelizeMergePrepare(data_vec, thread_pool, is_cancelled);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -485,10 +485,10 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena *) const override
|
||||
{
|
||||
if constexpr (is_able_to_parallelize_merge)
|
||||
this->data(place).set.merge(this->data(rhs).set, &thread_pool);
|
||||
this->data(place).set.merge(this->data(rhs).set, &thread_pool, &is_cancelled);
|
||||
else
|
||||
this->data(place).set.merge(this->data(rhs).set);
|
||||
}
|
||||
@ -579,10 +579,10 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; }
|
||||
bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena *) const override
|
||||
{
|
||||
if constexpr (is_able_to_parallelize_merge)
|
||||
this->data(place).set.merge(this->data(rhs).set, &thread_pool);
|
||||
this->data(place).set.merge(this->data(rhs).set, &thread_pool, &is_cancelled);
|
||||
else
|
||||
this->data(place).set.merge(this->data(rhs).set);
|
||||
}
|
||||
|
@ -144,9 +144,14 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, arena);
|
||||
nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, is_cancelled, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
|
@ -167,9 +167,14 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, arena);
|
||||
nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, is_cancelled, arena);
|
||||
}
|
||||
|
||||
void mergeBatch(
|
||||
|
@ -113,9 +113,14 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, arena);
|
||||
nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, is_cancelled, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
|
@ -154,9 +154,18 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, arena);
|
||||
AggregateDataPtrs nested_places(places.begin(), places.end());
|
||||
for (auto & nested_place : nested_places)
|
||||
nested_place = nestedPlace(nested_place);
|
||||
|
||||
nested_function->parallelizeMergePrepare(nested_places, thread_pool, is_cancelled);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||
{
|
||||
nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, is_cancelled, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
@ -482,7 +491,7 @@ public:
|
||||
std::vector<const UInt8 *> nullable_filters;
|
||||
const IColumn * nested_columns[number_of_arguments];
|
||||
|
||||
std::unique_ptr<UInt8[]> final_flags = nullptr;
|
||||
std::unique_ptr<UInt8[]> final_flags;
|
||||
const UInt8 * final_flags_ptr = nullptr;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
|
@ -94,9 +94,14 @@ public:
|
||||
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
|
||||
bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); }
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
|
||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, arena);
|
||||
nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, thread_pool, is_cancelled, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
|
@ -151,7 +151,7 @@ public:
|
||||
|
||||
virtual bool isParallelizeMergePrepareNeeded() const { return false; }
|
||||
|
||||
virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/) const
|
||||
virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic<bool> & /*is_cancelled*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName());
|
||||
}
|
||||
@ -168,7 +168,7 @@ public:
|
||||
|
||||
/// Should be used only if isAbleToParallelizeMerge() returned true.
|
||||
virtual void
|
||||
merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const
|
||||
merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, std::atomic<bool> & /*is_cancelled*/, Arena * /*arena*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName());
|
||||
}
|
||||
|
@ -258,10 +258,10 @@ private:
|
||||
chassert(limit > 0);
|
||||
|
||||
/// With a large number of values, we will generate random numbers several times slower.
|
||||
if (limit <= static_cast<UInt64>(rng.max()))
|
||||
if (limit <= static_cast<UInt64>(pcg32_fast::max()))
|
||||
return rng() % limit;
|
||||
else
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % limit;
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(pcg32_fast::max()) + 1ULL) + static_cast<UInt64>(rng())) % limit;
|
||||
}
|
||||
|
||||
void sortIfNeeded()
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
/// In merge, if one of the lhs and rhs is twolevelset and the other is singlelevelset, then the singlelevelset will need to convertToTwoLevel().
|
||||
/// It's not in parallel and will cost extra large time if the thread_num is large.
|
||||
/// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel.
|
||||
static void parallelizeMergePrepare(const std::vector<UniqExactSet *> & data_vec, ThreadPool & thread_pool)
|
||||
static void parallelizeMergePrepare(const std::vector<UniqExactSet *> & data_vec, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled)
|
||||
{
|
||||
UInt64 single_level_set_num = 0;
|
||||
UInt64 all_single_hash_size = 0;
|
||||
@ -63,7 +63,7 @@ public:
|
||||
try
|
||||
{
|
||||
auto data_vec_atomic_index = std::make_shared<std::atomic_uint32_t>(0);
|
||||
auto thread_func = [data_vec, data_vec_atomic_index, thread_group = CurrentThread::getGroup()]()
|
||||
auto thread_func = [data_vec, data_vec_atomic_index, &is_cancelled, thread_group = CurrentThread::getGroup()]()
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
if (thread_group)
|
||||
@ -76,6 +76,9 @@ public:
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (is_cancelled.load(std::memory_order_seq_cst))
|
||||
return;
|
||||
|
||||
const auto i = data_vec_atomic_index->fetch_add(1);
|
||||
if (i >= data_vec.size())
|
||||
return;
|
||||
@ -96,7 +99,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr)
|
||||
auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic<bool> * is_cancelled = nullptr)
|
||||
{
|
||||
if (isSingleLevel() && other.isTwoLevel())
|
||||
convertToTwoLevel();
|
||||
@ -113,7 +116,9 @@ public:
|
||||
if (!thread_pool)
|
||||
{
|
||||
for (size_t i = 0; i < rhs.NUM_BUCKETS; ++i)
|
||||
{
|
||||
lhs.impls[i].merge(rhs.impls[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -121,7 +126,7 @@ public:
|
||||
{
|
||||
auto next_bucket_to_merge = std::make_shared<std::atomic_uint32_t>(0);
|
||||
|
||||
auto thread_func = [&lhs, &rhs, next_bucket_to_merge, thread_group = CurrentThread::getGroup()]()
|
||||
auto thread_func = [&lhs, &rhs, next_bucket_to_merge, is_cancelled, thread_group = CurrentThread::getGroup()]()
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
if (thread_group)
|
||||
@ -133,6 +138,9 @@ public:
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (is_cancelled->load(std::memory_order_seq_cst))
|
||||
return;
|
||||
|
||||
const auto bucket = next_bucket_to_merge->fetch_add(1);
|
||||
if (bucket >= rhs.NUM_BUCKETS)
|
||||
return;
|
||||
|
@ -11,35 +11,37 @@ namespace DB
|
||||
* Example of usage:
|
||||
* std::unordered_map<QueryTreeNodeConstRawPtrWithHash, std::string> map;
|
||||
*/
|
||||
template <typename QueryTreeNodePtrType>
|
||||
template <typename QueryTreeNodePtrType, bool compare_aliases = true>
|
||||
struct QueryTreeNodeWithHash
|
||||
{
|
||||
QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT
|
||||
: node(std::move(node_))
|
||||
, hash(node->getTreeHash())
|
||||
, hash(node->getTreeHash({.compare_aliases = compare_aliases}))
|
||||
{}
|
||||
|
||||
QueryTreeNodePtrType node = nullptr;
|
||||
CityHash_v1_0_2::uint128 hash;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline bool operator==(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
|
||||
template <typename T, bool compare_aliases>
|
||||
inline bool operator==(const QueryTreeNodeWithHash<T, compare_aliases> & lhs, const QueryTreeNodeWithHash<T, compare_aliases> & rhs)
|
||||
{
|
||||
return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node);
|
||||
return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool operator!=(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
|
||||
template <typename T, bool compare_aliases>
|
||||
inline bool operator!=(const QueryTreeNodeWithHash<T, compare_aliases> & lhs, const QueryTreeNodeWithHash<T, compare_aliases> & rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash<QueryTreeNodePtr>;
|
||||
using QueryTreeNodePtrWithHashWithoutAlias = QueryTreeNodeWithHash<QueryTreeNodePtr, /*compare_aliases*/ false>;
|
||||
using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash<IQueryTreeNode *>;
|
||||
using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash<const IQueryTreeNode *>;
|
||||
|
||||
using QueryTreeNodePtrWithHashSet = std::unordered_set<QueryTreeNodePtrWithHash>;
|
||||
using QueryTreeNodePtrWithHashWithoutAliasSet = std::unordered_set<QueryTreeNodePtrWithHashWithoutAlias>;
|
||||
using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set<QueryTreeNodeConstRawPtrWithHash>;
|
||||
|
||||
template <typename Value>
|
||||
@ -50,10 +52,10 @@ using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map<QueryTreeNodeCons
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct std::hash<DB::QueryTreeNodeWithHash<T>>
|
||||
template <typename T, bool compare_aliases>
|
||||
struct std::hash<DB::QueryTreeNodeWithHash<T, compare_aliases>>
|
||||
{
|
||||
size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
|
||||
size_t operator()(const DB::QueryTreeNodeWithHash<T, compare_aliases> & node_with_hash) const
|
||||
{
|
||||
return node_with_hash.hash.low64;
|
||||
}
|
||||
|
@ -164,7 +164,7 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, CompareOptions compare_
|
||||
return true;
|
||||
}
|
||||
|
||||
IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
|
||||
IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(CompareOptions compare_options) const
|
||||
{
|
||||
/** Compute tree hash with this node as root.
|
||||
*
|
||||
@ -201,7 +201,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
|
||||
}
|
||||
|
||||
hash_state.update(static_cast<size_t>(node_to_process->getNodeType()));
|
||||
if (!node_to_process->alias.empty())
|
||||
if (compare_options.compare_aliases && !node_to_process->alias.empty())
|
||||
{
|
||||
hash_state.update(node_to_process->alias.size());
|
||||
hash_state.update(node_to_process->alias);
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
* Alias of query tree node is part of query tree hash.
|
||||
* Original AST is not part of query tree hash.
|
||||
*/
|
||||
Hash getTreeHash() const;
|
||||
Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true }) const;
|
||||
|
||||
/// Get a deep copy of the query tree
|
||||
QueryTreeNodePtr clone() const;
|
||||
|
@ -1,73 +0,0 @@
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/Passes/ConvertInToEqualPass.h>
|
||||
#include <Functions/equals.h>
|
||||
#include <Functions/notEquals.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ConvertInToEqualPassVisitor : public InDepthQueryTreeVisitorWithContext<ConvertInToEqualPassVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<ConvertInToEqualPassVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
static const std::unordered_map<String, String> MAPPING = {
|
||||
{"in", "equals"},
|
||||
{"notIn", "notEquals"}
|
||||
};
|
||||
auto * func_node = node->as<FunctionNode>();
|
||||
if (!func_node
|
||||
|| !MAPPING.contains(func_node->getFunctionName())
|
||||
|| func_node->getArguments().getNodes().size() != 2)
|
||||
return ;
|
||||
auto args = func_node->getArguments().getNodes();
|
||||
auto * column_node = args[0]->as<ColumnNode>();
|
||||
auto * constant_node = args[1]->as<ConstantNode>();
|
||||
if (!column_node || !constant_node)
|
||||
return ;
|
||||
// IN multiple values is not supported
|
||||
if (constant_node->getValue().getType() == Field::Types::Which::Tuple
|
||||
|| constant_node->getValue().getType() == Field::Types::Which::Array)
|
||||
return ;
|
||||
// x IN null not equivalent to x = null
|
||||
if (constant_node->getValue().isNull())
|
||||
return ;
|
||||
auto result_func_name = MAPPING.at(func_node->getFunctionName());
|
||||
auto equal = std::make_shared<FunctionNode>(result_func_name);
|
||||
QueryTreeNodes arguments{column_node->clone(), constant_node->clone()};
|
||||
equal->getArguments().getNodes() = std::move(arguments);
|
||||
FunctionOverloadResolverPtr resolver;
|
||||
bool decimal_check_overflow = getContext()->getSettingsRef().decimal_check_overflow;
|
||||
if (result_func_name == "equals")
|
||||
{
|
||||
resolver = createInternalFunctionEqualOverloadResolver(decimal_check_overflow);
|
||||
}
|
||||
else
|
||||
{
|
||||
resolver = createInternalFunctionNotEqualOverloadResolver(decimal_check_overflow);
|
||||
}
|
||||
try
|
||||
{
|
||||
equal->resolveAsFunction(resolver);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// When function resolver fails, we should not replace the function node
|
||||
return;
|
||||
}
|
||||
node = equal;
|
||||
}
|
||||
};
|
||||
|
||||
void ConvertInToEqualPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
|
||||
{
|
||||
ConvertInToEqualPassVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Optimize `in` to `equals` if possible.
|
||||
* 1. convert in single value to equal
|
||||
* Example: SELECT * from test where x IN (1);
|
||||
* Result: SELECT * from test where x = 1;
|
||||
*
|
||||
* 2. convert not in single value to notEqual
|
||||
* Example: SELECT * from test where x NOT IN (1);
|
||||
* Result: SELECT * from test where x != 1;
|
||||
*
|
||||
* If value is null or tuple, do not convert.
|
||||
*/
|
||||
class ConvertInToEqualPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "ConvertInToEqualPass"; }
|
||||
|
||||
String getDescription() override { return "Convert in to equal"; }
|
||||
|
||||
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
|
||||
};
|
||||
}
|
@ -146,7 +146,7 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
|
||||
if (query_node_typed.hasGroupBy())
|
||||
{
|
||||
/// It is expected by execution layer that if there are only 1 grouping set it will be removed
|
||||
if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.getGroupBy().getNodes().size() == 1)
|
||||
if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.getGroupBy().getNodes().size() == 1 && !context->getSettingsRef().group_by_use_nulls)
|
||||
{
|
||||
auto grouping_set_list_node = query_node_typed.getGroupBy().getNodes().front();
|
||||
auto & grouping_set_list_node_typed = grouping_set_list_node->as<ListNode &>();
|
||||
|
@ -65,6 +65,12 @@ public:
|
||||
auto multi_if_function = std::make_shared<FunctionNode>("multiIf");
|
||||
multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments);
|
||||
multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentColumns()));
|
||||
|
||||
/// Ignore if returned type changed.
|
||||
/// Example : SELECT now64(if(Null, NULL, if(Null, nan, toFloat64(number))), Null) FROM numbers(2)
|
||||
if (!multi_if_function->getResultType()->equals(*function_node->getResultType()))
|
||||
return;
|
||||
|
||||
node = std::move(multi_if_function);
|
||||
}
|
||||
|
||||
|
@ -776,7 +776,13 @@ struct IdentifierResolveScope
|
||||
/// Table expression node to data
|
||||
std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;
|
||||
|
||||
QueryTreeNodePtrWithHashSet nullable_group_by_keys;
|
||||
QueryTreeNodePtrWithHashWithoutAliasSet nullable_group_by_keys;
|
||||
/// Here we count the number of nullable GROUP BY keys we met resolving expression.
|
||||
/// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube`
|
||||
/// both `number` and `tuple(number)` would be in nullable_group_by_keys.
|
||||
/// But when we resolve `tuple(tuple(number))` we should figure out that `tuple(number)` is already a key,
|
||||
/// and we should not convert `number` to nullable.
|
||||
size_t found_nullable_group_by_key_in_scope = 0;
|
||||
|
||||
/** It's possible that after a JOIN, a column in the projection has a type different from the column in the source table.
|
||||
* (For example, after join_use_nulls or USING column casted to supertype)
|
||||
@ -1934,8 +1940,7 @@ std::vector<String> QueryAnalyzer::collectIdentifierTypoHints(const Identifier &
|
||||
for (const auto & valid_identifier : valid_identifiers)
|
||||
prompting_strings.push_back(valid_identifier.getFullName());
|
||||
|
||||
NamePrompter<1> prompter;
|
||||
return prompter.getHints(unresolved_identifier.getFullName(), prompting_strings);
|
||||
return NamePrompter<1>::getHints(unresolved_identifier.getFullName(), prompting_strings);
|
||||
}
|
||||
|
||||
/** Wrap expression node in tuple element function calls for nested paths.
|
||||
@ -2059,82 +2064,100 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
|
||||
subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false);
|
||||
|
||||
auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/);
|
||||
options.only_analyze = only_analyze;
|
||||
auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node->toAST(), subquery_context, subquery_context->getViewSource(), options);
|
||||
|
||||
auto io = interpreter->execute();
|
||||
PullingAsyncPipelineExecutor executor(io.pipeline);
|
||||
io.pipeline.setProgressCallback(context->getProgressCallback());
|
||||
io.pipeline.setProcessListElement(context->getProcessListElement());
|
||||
|
||||
Block block;
|
||||
|
||||
while (block.rows() == 0 && executor.pull(block))
|
||||
if (only_analyze)
|
||||
{
|
||||
}
|
||||
|
||||
if (block.rows() == 0)
|
||||
{
|
||||
auto types = interpreter->getSampleBlock().getDataTypes();
|
||||
if (types.size() != 1)
|
||||
types = {std::make_shared<DataTypeTuple>(types)};
|
||||
|
||||
auto & type = types[0];
|
||||
if (!type->isNullable())
|
||||
/// If query is only analyzed, then constants are not correct.
|
||||
scalar_block = interpreter->getSampleBlock();
|
||||
for (auto & column : scalar_block)
|
||||
{
|
||||
if (!type->canBeInsideNullable())
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY,
|
||||
"Scalar subquery returned empty result of type {} which cannot be Nullable",
|
||||
type->getName());
|
||||
|
||||
type = makeNullable(type);
|
||||
if (column.column->empty())
|
||||
{
|
||||
auto mut_col = column.column->cloneEmpty();
|
||||
mut_col->insertDefault();
|
||||
column.column = std::move(mut_col);
|
||||
}
|
||||
}
|
||||
|
||||
auto scalar_column = type->createColumn();
|
||||
scalar_column->insert(Null());
|
||||
scalar_block.insert({std::move(scalar_column), type, "null"});
|
||||
}
|
||||
else
|
||||
{
|
||||
if (block.rows() != 1)
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
||||
auto io = interpreter->execute();
|
||||
PullingAsyncPipelineExecutor executor(io.pipeline);
|
||||
io.pipeline.setProgressCallback(context->getProgressCallback());
|
||||
io.pipeline.setProcessListElement(context->getProcessListElement());
|
||||
|
||||
Block tmp_block;
|
||||
while (tmp_block.rows() == 0 && executor.pull(tmp_block))
|
||||
Block block;
|
||||
|
||||
while (block.rows() == 0 && executor.pull(block))
|
||||
{
|
||||
}
|
||||
|
||||
if (tmp_block.rows() != 0)
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
||||
|
||||
block = materializeBlock(block);
|
||||
size_t columns = block.columns();
|
||||
|
||||
if (columns == 1)
|
||||
if (block.rows() == 0)
|
||||
{
|
||||
auto & column = block.getByPosition(0);
|
||||
/// Here we wrap type to nullable if we can.
|
||||
/// It is needed cause if subquery return no rows, it's result will be Null.
|
||||
/// In case of many columns, do not check it cause tuple can't be nullable.
|
||||
if (!column.type->isNullable() && column.type->canBeInsideNullable())
|
||||
auto types = interpreter->getSampleBlock().getDataTypes();
|
||||
if (types.size() != 1)
|
||||
types = {std::make_shared<DataTypeTuple>(types)};
|
||||
|
||||
auto & type = types[0];
|
||||
if (!type->isNullable())
|
||||
{
|
||||
column.type = makeNullable(column.type);
|
||||
column.column = makeNullable(column.column);
|
||||
if (!type->canBeInsideNullable())
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY,
|
||||
"Scalar subquery returned empty result of type {} which cannot be Nullable",
|
||||
type->getName());
|
||||
|
||||
type = makeNullable(type);
|
||||
}
|
||||
|
||||
scalar_block = block;
|
||||
auto scalar_column = type->createColumn();
|
||||
scalar_column->insert(Null());
|
||||
scalar_block.insert({std::move(scalar_column), type, "null"});
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Make unique column names for tuple.
|
||||
*
|
||||
* Example: SELECT (SELECT 2 AS x, x)
|
||||
*/
|
||||
makeUniqueColumnNamesInBlock(block);
|
||||
if (block.rows() != 1)
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
||||
|
||||
scalar_block.insert({
|
||||
ColumnTuple::create(block.getColumns()),
|
||||
std::make_shared<DataTypeTuple>(block.getDataTypes(), block.getNames()),
|
||||
"tuple"});
|
||||
Block tmp_block;
|
||||
while (tmp_block.rows() == 0 && executor.pull(tmp_block))
|
||||
{
|
||||
}
|
||||
|
||||
if (tmp_block.rows() != 0)
|
||||
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");
|
||||
|
||||
block = materializeBlock(block);
|
||||
size_t columns = block.columns();
|
||||
|
||||
if (columns == 1)
|
||||
{
|
||||
auto & column = block.getByPosition(0);
|
||||
/// Here we wrap type to nullable if we can.
|
||||
/// It is needed cause if subquery return no rows, it's result will be Null.
|
||||
/// In case of many columns, do not check it cause tuple can't be nullable.
|
||||
if (!column.type->isNullable() && column.type->canBeInsideNullable())
|
||||
{
|
||||
column.type = makeNullable(column.type);
|
||||
column.column = makeNullable(column.column);
|
||||
}
|
||||
|
||||
scalar_block = block;
|
||||
}
|
||||
else
|
||||
{
|
||||
/** Make unique column names for tuple.
|
||||
*
|
||||
* Example: SELECT (SELECT 2 AS x, x)
|
||||
*/
|
||||
makeUniqueColumnNamesInBlock(block);
|
||||
|
||||
scalar_block.insert({
|
||||
ColumnTuple::create(block.getColumns()),
|
||||
std::make_shared<DataTypeTuple>(block.getDataTypes(), block.getNames()),
|
||||
"tuple"});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2176,7 +2199,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
|
||||
auto & nearest_query_scope_query_node = nearest_query_scope->scope_node->as<QueryNode &>();
|
||||
auto & mutable_context = nearest_query_scope_query_node.getMutableContext();
|
||||
|
||||
auto scalar_query_hash_string = DB::toString(node_with_hash.hash);
|
||||
auto scalar_query_hash_string = DB::toString(node_with_hash.hash) + (only_analyze ? "_analyze" : "");
|
||||
|
||||
if (mutable_context->hasQueryContext())
|
||||
mutable_context->getQueryContext()->addScalar(scalar_query_hash_string, scalar_block);
|
||||
@ -6131,6 +6154,12 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
return resolved_expression_it->second;
|
||||
}
|
||||
|
||||
bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction();
|
||||
if (is_nullable_group_by_key)
|
||||
++scope.found_nullable_group_by_key_in_scope;
|
||||
|
||||
SCOPE_EXIT(scope.found_nullable_group_by_key_in_scope -= is_nullable_group_by_key);
|
||||
|
||||
String node_alias = node->getAlias();
|
||||
ProjectionNames result_projection_names;
|
||||
|
||||
@ -6422,7 +6451,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
|
||||
validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size);
|
||||
|
||||
if (scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction())
|
||||
if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1)
|
||||
{
|
||||
node = node->clone();
|
||||
node->convertToNullable();
|
||||
@ -6649,45 +6678,48 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
|
||||
|
||||
if (query_node_typed.isGroupByWithGroupingSets())
|
||||
{
|
||||
QueryTreeNodes nullable_group_by_keys;
|
||||
for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes())
|
||||
{
|
||||
if (settings.enable_positional_arguments)
|
||||
replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope);
|
||||
|
||||
resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
|
||||
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
|
||||
auto & group_by_list = grouping_sets_keys_list_node->as<ListNode &>().getNodes();
|
||||
expandTuplesInList(group_by_list);
|
||||
|
||||
if (scope.group_by_use_nulls)
|
||||
for (const auto & group_by_elem : group_by_list)
|
||||
nullable_group_by_keys.push_back(group_by_elem->clone());
|
||||
|
||||
resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
}
|
||||
|
||||
if (scope.group_by_use_nulls)
|
||||
{
|
||||
for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
|
||||
{
|
||||
for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
|
||||
scope.nullable_group_by_keys.insert(group_by_elem);
|
||||
}
|
||||
}
|
||||
for (auto & nullable_group_by_key : nullable_group_by_keys)
|
||||
scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.enable_positional_arguments)
|
||||
replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope);
|
||||
|
||||
resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
// Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key.
|
||||
// It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2)
|
||||
auto & group_by_list = query_node_typed.getGroupBy().getNodes();
|
||||
expandTuplesInList(group_by_list);
|
||||
|
||||
QueryTreeNodes nullable_group_by_keys;
|
||||
if (scope.group_by_use_nulls)
|
||||
{
|
||||
for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
|
||||
scope.nullable_group_by_keys.insert(group_by_elem);
|
||||
nullable_group_by_keys.push_back(group_by_elem->clone());
|
||||
}
|
||||
|
||||
resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
for (auto & nullable_group_by_key : nullable_group_by_keys)
|
||||
scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
@ -83,7 +84,7 @@ public:
|
||||
rhs->getArguments().getNodes().push_back(rhs_count);
|
||||
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName());
|
||||
|
||||
const auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
|
||||
auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
|
||||
if (column_id == 0)
|
||||
new_node->getArguments().getNodes() = {lhs, rhs};
|
||||
else if (column_id == 1)
|
||||
@ -93,7 +94,12 @@ public:
|
||||
if (!new_node)
|
||||
return;
|
||||
|
||||
node = new_node;
|
||||
QueryTreeNodePtr res = std::move(new_node);
|
||||
|
||||
if (!res->getResultType()->equals(*function_node->getResultType()))
|
||||
res = createCastFunction(res, function_node->getResultType(), getContext());
|
||||
|
||||
node = std::move(res);
|
||||
|
||||
}
|
||||
|
||||
|
@ -444,8 +444,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
|
||||
nulls_sort_direction = order_by_element.nulls_direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING;
|
||||
|
||||
std::shared_ptr<Collator> collator;
|
||||
if (order_by_element.collation)
|
||||
collator = std::make_shared<Collator>(order_by_element.collation->as<ASTLiteral &>().value.get<String &>());
|
||||
if (order_by_element.getCollation())
|
||||
collator = std::make_shared<Collator>(order_by_element.getCollation()->as<ASTLiteral &>().value.get<String &>());
|
||||
|
||||
const auto & sort_expression_ast = order_by_element.children.at(0);
|
||||
auto sort_expression = buildExpression(sort_expression_ast, context);
|
||||
@ -455,12 +455,12 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
|
||||
std::move(collator),
|
||||
order_by_element.with_fill);
|
||||
|
||||
if (order_by_element.fill_from)
|
||||
sort_node->getFillFrom() = buildExpression(order_by_element.fill_from, context);
|
||||
if (order_by_element.fill_to)
|
||||
sort_node->getFillTo() = buildExpression(order_by_element.fill_to, context);
|
||||
if (order_by_element.fill_step)
|
||||
sort_node->getFillStep() = buildExpression(order_by_element.fill_step, context);
|
||||
if (order_by_element.getFillFrom())
|
||||
sort_node->getFillFrom() = buildExpression(order_by_element.getFillFrom(), context);
|
||||
if (order_by_element.getFillTo())
|
||||
sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context);
|
||||
if (order_by_element.getFillStep())
|
||||
sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context);
|
||||
|
||||
list_node->getNodes().push_back(std::move(sort_node));
|
||||
}
|
||||
@ -558,7 +558,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
}
|
||||
else if (const auto * function = expression->as<ASTFunction>())
|
||||
{
|
||||
if (function->is_lambda_function)
|
||||
if (function->is_lambda_function || isASTLambdaFunction(*function))
|
||||
{
|
||||
const auto & lambda_arguments_and_expression = function->arguments->as<ASTExpressionList &>().children;
|
||||
auto & lambda_arguments_tuple = lambda_arguments_and_expression.at(0)->as<ASTFunction &>();
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <Analyzer/Passes/MultiIfToIfPass.h>
|
||||
#include <Analyzer/Passes/IfConstantConditionPass.h>
|
||||
#include <Analyzer/Passes/IfChainToMultiIfPass.h>
|
||||
#include <Analyzer/Passes/ConvertInToEqualPass.h>
|
||||
#include <Analyzer/Passes/OrderByTupleEliminationPass.h>
|
||||
#include <Analyzer/Passes/NormalizeCountVariantsPass.h>
|
||||
#include <Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h>
|
||||
@ -264,7 +263,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze)
|
||||
manager.addPass(std::make_unique<SumIfToCountIfPass>());
|
||||
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
|
||||
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());
|
||||
manager.addPass(std::make_unique<ConvertInToEqualPass>());
|
||||
|
||||
/// should before AggregateFunctionsArithmericOperationsPass
|
||||
manager.addPass(std::make_unique<AggregateFunctionOfGroupByKeysPass>());
|
||||
|
@ -1,14 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
#include <QueryPipeline/SizeLimits.h>
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
class Set;
|
||||
using SetPtr = std::shared_ptr<Set>;
|
||||
|
||||
|
@ -120,17 +120,18 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
|
||||
result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value();
|
||||
|
||||
result->with_fill = with_fill;
|
||||
result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr;
|
||||
result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr;
|
||||
result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr;
|
||||
result->children.push_back(getExpression()->toAST(options));
|
||||
|
||||
if (collator)
|
||||
{
|
||||
result->children.push_back(std::make_shared<ASTLiteral>(Field(collator->getLocale())));
|
||||
result->collation = result->children.back();
|
||||
}
|
||||
result->setCollation(std::make_shared<ASTLiteral>(Field(collator->getLocale())));
|
||||
|
||||
result->with_fill = with_fill;
|
||||
if (hasFillFrom())
|
||||
result->setFillFrom(getFillFrom()->toAST(options));
|
||||
if (hasFillTo())
|
||||
result->setFillTo(getFillTo()->toAST(options));
|
||||
if (hasFillStep())
|
||||
result->setFillStep(getFillStep()->toAST(options));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -124,11 +124,12 @@ BackupReaderS3::BackupReaderS3(
|
||||
bool allow_s3_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
const ContextPtr & context_,
|
||||
bool is_internal_backup)
|
||||
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName()))
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
@ -214,11 +215,12 @@ BackupWriterS3::BackupWriterS3(
|
||||
const String & storage_class_name,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
const ContextPtr & context_,
|
||||
bool is_internal_backup)
|
||||
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName()))
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
|
@ -18,7 +18,15 @@ namespace DB
|
||||
class BackupReaderS3 : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
BackupReaderS3(
|
||||
const S3::URI & s3_uri_,
|
||||
const String & access_key_id_,
|
||||
const String & secret_access_key_,
|
||||
bool allow_s3_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_,
|
||||
bool is_internal_backup);
|
||||
~BackupReaderS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
@ -41,7 +49,16 @@ private:
|
||||
class BackupWriterS3 : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
BackupWriterS3(
|
||||
const S3::URI & s3_uri_,
|
||||
const String & access_key_id_,
|
||||
const String & secret_access_key_,
|
||||
bool allow_s3_native_copy,
|
||||
const String & storage_class_name,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_,
|
||||
bool is_internal_backup);
|
||||
~BackupWriterS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -940,6 +940,7 @@ void BackupsWorker::doRestore(
|
||||
backup_open_params.use_same_s3_credentials_for_base_backup = restore_settings.use_same_s3_credentials_for_base_backup;
|
||||
backup_open_params.read_settings = getReadSettingsForRestore(context);
|
||||
backup_open_params.write_settings = getWriteSettingsForRestore(context);
|
||||
backup_open_params.is_internal_backup = restore_settings.internal;
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
@ -110,7 +110,8 @@ void registerBackupEngineS3(BackupFactory & factory)
|
||||
params.allow_s3_native_copy,
|
||||
params.read_settings,
|
||||
params.write_settings,
|
||||
params.context);
|
||||
params.context,
|
||||
params.is_internal_backup);
|
||||
|
||||
return std::make_unique<BackupImpl>(
|
||||
params.backup_info,
|
||||
@ -129,7 +130,8 @@ void registerBackupEngineS3(BackupFactory & factory)
|
||||
params.s3_storage_class,
|
||||
params.read_settings,
|
||||
params.write_settings,
|
||||
params.context);
|
||||
params.context,
|
||||
params.is_internal_backup);
|
||||
|
||||
return std::make_unique<BackupImpl>(
|
||||
params.backup_info,
|
||||
|
@ -753,7 +753,7 @@ void ClientBase::setDefaultFormatsFromConfiguration()
|
||||
else
|
||||
default_output_format = "TSV";
|
||||
}
|
||||
else if (is_interactive || stdout_is_a_tty)
|
||||
else if (is_interactive)
|
||||
{
|
||||
default_output_format = "PrettyCompact";
|
||||
}
|
||||
|
@ -207,11 +207,17 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
|
||||
max_entries = nested_pools.size();
|
||||
}
|
||||
else if (pool_mode == PoolMode::GET_ONE)
|
||||
{
|
||||
max_entries = 1;
|
||||
}
|
||||
else if (pool_mode == PoolMode::GET_MANY)
|
||||
{
|
||||
max_entries = settings.max_parallel_replicas;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode");
|
||||
}
|
||||
|
||||
if (!priority_func)
|
||||
priority_func = makeGetPriorityFunc(settings);
|
||||
|
@ -82,7 +82,7 @@ std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode
|
||||
}
|
||||
case PoolMode::GET_MANY:
|
||||
{
|
||||
max_entries = max_parallel_replicas;
|
||||
max_entries = std::min(max_parallel_replicas, shuffled_pools.size());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -158,7 +158,7 @@ private:
|
||||
/// checking the number of requested replicas that are still in process).
|
||||
size_t requested_connections_count = 0;
|
||||
|
||||
const size_t max_parallel_replicas = 0;
|
||||
const size_t max_parallel_replicas = 1;
|
||||
const bool skip_unavailable_shards = false;
|
||||
};
|
||||
|
||||
|
@ -346,7 +346,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const
|
||||
}
|
||||
|
||||
auto res_values = values->filter(values_filter, values_result_size_hint);
|
||||
return this->create(res_values, std::move(res_offsets), res_offset);
|
||||
return create(res_values, std::move(res_offsets), res_offset);
|
||||
}
|
||||
|
||||
void ColumnSparse::expand(const Filter & mask, bool inverted)
|
||||
|
@ -671,7 +671,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
|
||||
ReadableSize(rss),
|
||||
ReadableSize(difference));
|
||||
|
||||
total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas);
|
||||
MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,10 +2,13 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/SharedMutex.h>
|
||||
#include <Common/SharedLockGuard.h>
|
||||
#include <Common/SharedMutex.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** AtomicLogger allows to atomically change logger.
|
||||
* Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations,
|
||||
@ -49,3 +52,5 @@ private:
|
||||
mutable DB::SharedMutex log_mutex;
|
||||
LoggerPtr logger;
|
||||
};
|
||||
|
||||
}
|
||||
|
16
src/Common/CurrentThreadHelpers.cpp
Normal file
16
src/Common/CurrentThreadHelpers.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/CurrentThreadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool currentThreadHasGroup()
|
||||
{
|
||||
return DB::CurrentThread::getGroup() != nullptr;
|
||||
}
|
||||
|
||||
LogsLevel currentThreadLogsLevel()
|
||||
{
|
||||
return DB::CurrentThread::get().getClientLogsLevel();
|
||||
}
|
||||
}
|
9
src/Common/CurrentThreadHelpers.h
Normal file
9
src/Common/CurrentThreadHelpers.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/LogsLevel.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
bool currentThreadHasGroup();
|
||||
LogsLevel currentThreadLogsLevel();
|
||||
}
|
@ -1,13 +1,15 @@
|
||||
#include "DateLUT.h"
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
#include <Poco/DigestStream.h>
|
||||
#include <Poco/Exception.h>
|
||||
#include <Poco/SHA1Engine.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace
|
||||
@ -140,6 +142,38 @@ std::string determineDefaultTimeZone()
|
||||
|
||||
}
|
||||
|
||||
const DateLUTImpl & DateLUT::instance()
|
||||
{
|
||||
const auto & date_lut = getInstance();
|
||||
|
||||
if (DB::CurrentThread::isInitialized())
|
||||
{
|
||||
std::string timezone_from_context;
|
||||
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
|
||||
|
||||
if (query_context)
|
||||
{
|
||||
timezone_from_context = extractTimezoneFromContext(query_context);
|
||||
|
||||
if (!timezone_from_context.empty())
|
||||
return date_lut.getImplementation(timezone_from_context);
|
||||
}
|
||||
|
||||
/// On the server side, timezone is passed in query_context,
|
||||
/// but on CH-client side we have no query context,
|
||||
/// and each time we modify client's global context
|
||||
const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext();
|
||||
if (global_context)
|
||||
{
|
||||
timezone_from_context = extractTimezoneFromContext(global_context);
|
||||
|
||||
if (!timezone_from_context.empty())
|
||||
return date_lut.getImplementation(timezone_from_context);
|
||||
}
|
||||
}
|
||||
return serverTimezoneInstance();
|
||||
}
|
||||
|
||||
DateLUT::DateLUT()
|
||||
{
|
||||
/// Initialize the pointer to the default DateLUTImpl.
|
||||
|
@ -1,17 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include "DateLUTImpl.h"
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include "Common/CurrentThread.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Context;
|
||||
using ContextPtr = std::shared_ptr<const Context>;
|
||||
}
|
||||
|
||||
class DateLUTImpl;
|
||||
|
||||
|
||||
/// This class provides lazy initialization and lookup of singleton DateLUTImpl objects for a given timezone.
|
||||
class DateLUT : private boost::noncopyable
|
||||
@ -20,38 +26,7 @@ public:
|
||||
/// Return DateLUTImpl instance for session timezone.
|
||||
/// session_timezone is a session-level setting.
|
||||
/// If setting is not set, returns the server timezone.
|
||||
static ALWAYS_INLINE const DateLUTImpl & instance()
|
||||
{
|
||||
const auto & date_lut = getInstance();
|
||||
|
||||
if (DB::CurrentThread::isInitialized())
|
||||
{
|
||||
std::string timezone_from_context;
|
||||
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
|
||||
|
||||
if (query_context)
|
||||
{
|
||||
timezone_from_context = extractTimezoneFromContext(query_context);
|
||||
|
||||
if (!timezone_from_context.empty())
|
||||
return date_lut.getImplementation(timezone_from_context);
|
||||
}
|
||||
|
||||
/// On the server side, timezone is passed in query_context,
|
||||
/// but on CH-client side we have no query context,
|
||||
/// and each time we modify client's global context
|
||||
const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext();
|
||||
if (global_context)
|
||||
{
|
||||
timezone_from_context = extractTimezoneFromContext(global_context);
|
||||
|
||||
if (!timezone_from_context.empty())
|
||||
return date_lut.getImplementation(timezone_from_context);
|
||||
}
|
||||
|
||||
}
|
||||
return serverTimezoneInstance();
|
||||
}
|
||||
static const DateLUTImpl & instance();
|
||||
|
||||
static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone)
|
||||
{
|
||||
|
@ -1,8 +1,5 @@
|
||||
#include "DateLUTImpl.h"
|
||||
|
||||
#include <cctz/civil_time.h>
|
||||
#include <cctz/time_zone.h>
|
||||
#include <cctz/zone_info_source.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -11,6 +8,10 @@
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include <cctz/civil_time.h>
|
||||
#include <cctz/time_zone.h>
|
||||
#include <cctz/zone_info_source.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -214,6 +215,29 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int DateLUTImpl::toMillisecond(const DB::DateTime64 & datetime, Int64 scale_multiplier) const
|
||||
{
|
||||
constexpr Int64 millisecond_multiplier = 1'000;
|
||||
constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier;
|
||||
constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier;
|
||||
|
||||
auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier);
|
||||
|
||||
if (datetime.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
return millisecond;
|
||||
}
|
||||
|
||||
|
||||
/// Prefer to load timezones from blobs linked to the binary.
|
||||
/// The blobs are provided by "tzdata" library.
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <base/DayNum.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
|
||||
#include <ctime>
|
||||
#include <cassert>
|
||||
@ -50,6 +49,11 @@ enum class WeekDayMode
|
||||
WeekStartsSunday1 = 3
|
||||
};
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class DateTime64;
|
||||
}
|
||||
|
||||
/** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
|
||||
* First time was implemented for OLAPServer, that needed to do billions of such transformations.
|
||||
*/
|
||||
@ -593,29 +597,7 @@ public:
|
||||
return time % 60;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
unsigned toMillisecond(const DateOrTime & datetime, Int64 scale_multiplier) const
|
||||
{
|
||||
constexpr Int64 millisecond_multiplier = 1'000;
|
||||
constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier;
|
||||
constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier;
|
||||
|
||||
auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier);
|
||||
|
||||
if (datetime.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
return millisecond;
|
||||
}
|
||||
unsigned toMillisecond(const DB::DateTime64 & datetime, Int64 scale_multiplier) const;
|
||||
|
||||
unsigned toMinute(Time t) const
|
||||
{
|
||||
|
@ -1,26 +1,27 @@
|
||||
#include "Exception.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <cxxabi.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/demangle.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Common/AtomicLogger.h>
|
||||
#include <Common/ErrorCodes.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/LockMemoryExceptionInThread.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <Common/config_version.h>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <cxxabi.h>
|
||||
|
||||
#include <Poco/String.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
@ -1,22 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <cerrno>
|
||||
#include <exception>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/int8_to_string.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Common/AtomicLogger.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/LoggingFormatStringHelpers.h>
|
||||
#include <Common/StackTrace.h>
|
||||
|
||||
#include <cerrno>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
@ -24,6 +22,8 @@ namespace Poco { class Logger; }
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class AtomicLogger;
|
||||
|
||||
[[noreturn]] void abortOnFailedAssertion(const String & description);
|
||||
|
||||
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
|
||||
|
@ -50,7 +50,9 @@ static struct InitFiu
|
||||
REGULAR(check_table_query_delay_for_part) \
|
||||
REGULAR(dummy_failpoint) \
|
||||
REGULAR(prefetched_reader_pool_failpoint) \
|
||||
PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \
|
||||
PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \
|
||||
PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \
|
||||
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
|
||||
PAUSEABLE(dummy_pausable_failpoint) \
|
||||
ONCE(execute_query_calling_empty_set_result_func_on_exception)
|
||||
|
||||
|
@ -203,7 +203,7 @@ public:
|
||||
if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until)
|
||||
{
|
||||
LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit);
|
||||
mute_warning_until = roundUp(total_connections_in_group, limits.warning_step);
|
||||
mute_warning_until = roundUp(total_connections_in_group, HTTPConnectionPools::Limits::warning_step);
|
||||
}
|
||||
}
|
||||
|
||||
@ -295,8 +295,13 @@ private:
|
||||
String getTarget() const
|
||||
{
|
||||
if (!Session::getProxyConfig().host.empty())
|
||||
return fmt::format("{} over proxy {}", Session::getHost(), Session::getProxyConfig().host);
|
||||
return Session::getHost();
|
||||
return fmt::format("{}:{} over proxy {}",
|
||||
Session::getHost(),
|
||||
Session::getPort(),
|
||||
Session::getProxyConfig().host);
|
||||
return fmt::format("{}:{}",
|
||||
Session::getHost(),
|
||||
Session::getPort());
|
||||
}
|
||||
|
||||
void flushRequest() override
|
||||
@ -472,7 +477,8 @@ public:
|
||||
String getTarget() const
|
||||
{
|
||||
if (!proxy_configuration.isEmpty())
|
||||
return fmt::format("{} over proxy {}", host, proxy_configuration.host);
|
||||
return fmt::format("{} over proxy {}",
|
||||
host, proxy_configuration.host);
|
||||
return host;
|
||||
}
|
||||
|
||||
|
@ -207,7 +207,7 @@ public:
|
||||
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
|
||||
{
|
||||
DB::PrefetchingHelper prefetching;
|
||||
size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue();
|
||||
size_t prefetch_look_ahead = DB::PrefetchingHelper::getInitialLookAheadValue();
|
||||
|
||||
size_t i = 0;
|
||||
auto prefetch_it = advanceIterator(this->begin(), prefetch_look_ahead);
|
||||
@ -216,10 +216,10 @@ public:
|
||||
{
|
||||
if constexpr (prefetch)
|
||||
{
|
||||
if (i == prefetching.iterationsToMeasure())
|
||||
if (i == DB::PrefetchingHelper::iterationsToMeasure())
|
||||
{
|
||||
prefetch_look_ahead = prefetching.calcPrefetchLookAhead();
|
||||
prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - prefetching.getInitialLookAheadValue());
|
||||
prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - DB::PrefetchingHelper::getInitialLookAheadValue());
|
||||
}
|
||||
|
||||
if (prefetch_it != end)
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#if USE_JEMALLOC
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <jemalloc/jemalloc.h>
|
||||
|
@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
|
||||
|
||||
/** Stores a calendar date in broken-down form (year, month, day-in-month).
|
||||
|
@ -1,15 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <Poco/Channel.h>
|
||||
#include <memory>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
|
||||
using LoggerPtr = Poco::LoggerPtr;
|
||||
namespace Poco
|
||||
{
|
||||
class Channel;
|
||||
class Logger;
|
||||
using LoggerPtr = std::shared_ptr<Logger>;
|
||||
}
|
||||
|
||||
using LoggerPtr = std::shared_ptr<Poco::Logger>;
|
||||
using LoggerRawPtr = Poco::Logger *;
|
||||
|
||||
/** RAII wrappers around Poco/Logger.h.
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
#include <Common/LoggingFormatStringHelpers.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
@ -22,7 +22,7 @@ void protectMemoryRegion(void * addr, size_t len, int prot)
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t byte_size(size_t num_elements, size_t element_size)
|
||||
ALWAYS_INLINE size_t byte_size(size_t num_elements, size_t element_size)
|
||||
{
|
||||
size_t amount;
|
||||
if (__builtin_mul_overflow(num_elements, element_size, &amount))
|
||||
@ -30,7 +30,7 @@ size_t byte_size(size_t num_elements, size_t element_size)
|
||||
return amount;
|
||||
}
|
||||
|
||||
size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right)
|
||||
ALWAYS_INLINE size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right)
|
||||
{
|
||||
size_t amount;
|
||||
if (__builtin_add_overflow(byte_size(num_elements, element_size), pad_left + pad_right, &amount))
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ProfileEventsScope.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,15 +1,16 @@
|
||||
#include "QueryProfiler.h"
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/TraceSender.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/phdr_cache.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Common/TraceSender.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/phdr_cache.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
#include <random>
|
||||
|
||||
|
@ -366,7 +366,7 @@ String demangleAndCollapseNames(std::optional<std::string_view> file, const char
|
||||
if (file.has_value())
|
||||
{
|
||||
std::string_view file_copy = file.value();
|
||||
if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != file_copy.npos)
|
||||
if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != std::string_view::npos)
|
||||
file_copy.remove_suffix(file_copy.size() - trim_pos);
|
||||
if (file_copy.ends_with("functional"))
|
||||
return "?";
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
|
@ -96,7 +96,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_)
|
||||
stack_t altstack_description{};
|
||||
altstack_description.ss_sp = alt_stack.getData();
|
||||
altstack_description.ss_flags = 0;
|
||||
altstack_description.ss_size = alt_stack.getSize();
|
||||
altstack_description.ss_size = ThreadStack::getSize();
|
||||
|
||||
if (0 != sigaltstack(&altstack_description, nullptr))
|
||||
{
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include <Common/ZooKeeper/ZooKeeperConstants.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperImpl.h>
|
||||
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -10,17 +11,17 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <base/sleep.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/EventNotifier.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperIO.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperImpl.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
#include "Coordination/KeeperConstants.h"
|
||||
#include "config.h"
|
||||
|
@ -5,13 +5,11 @@
|
||||
#include <fmt/format.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/LoggingFormatStringHelpers.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/AtomicLogger.h>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
#include <Common/CurrentThreadHelpers.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/LoggingFormatStringHelpers.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
|
||||
#define LogToStr(x, y) std::make_unique<LogToStrImpl>(x, y)
|
||||
@ -22,7 +20,7 @@ using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
|
||||
namespace impl
|
||||
{
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] inline LoggerPtr getLoggerHelper(const DB::AtomicLogger & logger) { return logger.load(); }
|
||||
[[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] inline std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
@ -66,8 +64,7 @@ namespace impl
|
||||
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
|
||||
{ \
|
||||
auto _logger = ::impl::getLoggerHelper(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \
|
||||
const bool _is_clients_log = DB::currentThreadHasGroup() && DB::currentThreadLogsLevel() >= (priority); \
|
||||
if (!_is_clients_log && !_logger->is((PRIORITY))) \
|
||||
break; \
|
||||
\
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Logger.h>
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user