rebase master

This commit is contained in:
凌涛 2023-12-05 11:42:25 +08:00
commit 6794bbe196
289 changed files with 5912 additions and 1381 deletions

View File

@ -33,8 +33,6 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12

View File

@ -385,9 +385,25 @@ endif ()
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
# These files needs to be installed to make it possible that users can use well-known protobuf types
set(google_proto_files
${protobuf_source_dir}/src/google/protobuf/any.proto
${protobuf_source_dir}/src/google/protobuf/api.proto
${protobuf_source_dir}/src/google/protobuf/descriptor.proto
${protobuf_source_dir}/src/google/protobuf/duration.proto
${protobuf_source_dir}/src/google/protobuf/empty.proto
${protobuf_source_dir}/src/google/protobuf/field_mask.proto
${protobuf_source_dir}/src/google/protobuf/source_context.proto
${protobuf_source_dir}/src/google/protobuf/struct.proto
${protobuf_source_dir}/src/google/protobuf/timestamp.proto
${protobuf_source_dir}/src/google/protobuf/type.proto
${protobuf_source_dir}/src/google/protobuf/wrappers.proto
)
add_library(_protobuf INTERFACE)
target_link_libraries(_protobuf INTERFACE _libprotobuf)
target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
add_library(ch_contrib::protobuf ALIAS _protobuf)
add_library(_protoc INTERFACE)

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit 791d68fd89902835133c50435e380ec7a73271b7
Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640

View File

@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [TTL expr1] [PRIMARY KEY],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [TTL expr2] [PRIMARY KEY],
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY],
...
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
@ -1358,3 +1358,33 @@ In this sample configuration:
- `_partition_value` — Values (a tuple) of a `partition by` expression.
- `_sample_factor` — Sample factor (from the query).
- `_block_number` — Block number of the row, it is persisted on merges when `allow_experimental_block_number_column` is set to true.
## Column Statistics (Experimental) {#column-statistics}
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
``` sql
CREATE TABLE example_table
(
a Int64 STATISTIC(tdigest),
b Float64
)
ENGINE = MergeTree
ORDER BY a
```
We can also manipulate statistics with `ALTER` statements.
```sql
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
```
These lightweight statistics aggregate information about distribution of values in columns.
They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
#### Available Types of Column Statistics {#available-types-of-column-statistics}
- `tdigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.

View File

@ -1868,9 +1868,10 @@ Settings:
- `endpoint` HTTP endpoint for scraping metrics by prometheus server. Start from /.
- `port` Port for `endpoint`.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `metrics` Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
**Example**
@ -1886,6 +1887,7 @@ Settings:
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
<!-- highlight-end -->
</clickhouse>

View File

@ -4801,6 +4801,14 @@ a Tuple(
)
```
## allow_experimental_statistic {#allow_experimental_statistic}
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
## allow_statistic_optimize {#allow_statistic_optimize}
Allows using statistic to optimize the order of [prewhere conditions](../../sql-reference/statements/select/prewhere.md).
## analyze_index_with_space_filling_curves
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.

View File

@ -0,0 +1,48 @@
---
toc_priority: 112
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```

View File

@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)

View File

@ -56,7 +56,7 @@ Functions:
## Related content
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
Returns time and date values according to the determined format.
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
| Placeholder | Description | Example |
| Placeholder | Description | Example |
|----------|---------------------------------------------------------|------------|
| %a | abbreviated weekday name (Mon-Sun) | Mon |
| %b | abbreviated month name (Jan-Dec) | Jan |
| %c | month as an integer number (01-12) | 01 |
| %c | month as an integer number (01-12), see 'Note 3' below | 01 |
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %i | minute (00-59) | 33 |
| %I | hour in 12h format (01-12) | 10 |
| %j | day of the year (001-366) | 002 |
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %k | hour in 24h format (00-23), see 'Note 3' below | 14 |
| %l | hour in 12h format (01-12), see 'Note 3' below | 09 |
| %m | month as an integer number (01-12) | 01 |
| %M | full month name (January-December), see 'Note 2' below | January |
| %n | new-line character () | |
@ -2579,6 +2580,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
**Example**
``` sql

View File

@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
└──────────┴──────┴───────────┴───────────┘
```
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
Get the first available contact method for the customer from the contact list:

View File

@ -67,7 +67,45 @@ WHERE macro = 'test';
│ test │ Value │
└───────┴──────────────┘
```
## getClientHTTPHeader
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
**Syntax**
```sql
getClientHTTPHeader(name);
```
**Arguments**
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
**Returned value**
Value of the specified header.
Type:[String](../../sql-reference/data-types/string.md#string).
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
```sql
SELECT getCientHTTPHeader('test')
```
result:
```text
┌─getClientHTTPHeader('test')─┐
│ │
└────────────------───────────┘
```
Try to use http request:
```shell
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
#result
default
```
## FQDN
Returns the fully qualified domain name of the ClickHouse server.

View File

@ -16,6 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
- [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
:::note
Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).

View File

@ -0,0 +1,25 @@
---
slug: /en/sql-reference/statements/alter/statistic
sidebar_position: 45
sidebar_label: STATISTIC
---
# Manipulating Column Statistics
The following operations are available:
- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The first two commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing statistics metadata via ZooKeeper.
:::note
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -0,0 +1,86 @@
---
slug: /en/sql-reference/table-functions/fuzzJSON
sidebar_position: 75
sidebar_label: fuzzJSON
---
# fuzzJSON
Perturbs a JSON string with random variations.
``` sql
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
```
**Arguments**
- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
- `option=value` - Named collection optional parameters and their values.
- `json_str` (String) - The source string representing structured data in JSON format.
- `random_seed` (UInt64) - Manual random seed for producing stable results.
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
- `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
- `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
- `max_string_value_length` (UInt64) - The maximum length of a String value.
- `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
- `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
**Returned Value**
A table object with a a single column containing perturbed JSON strings.
## Usage Example
``` sql
CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
```
``` text
{"52Xz2Zd4vKNcuP2":true}
{"UPbOhOQAdPKIg91":3405264103600403024}
{"X0QUWu8yT":[]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
```
``` text
{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRE3":true}
{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "BREhhXj5":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
```
``` sql
SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
```
``` text
{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRjE":16137826149911306846}
{"XjKE":15076727133550123563}
```

View File

@ -1215,6 +1215,7 @@ ClickHouse использует потоки из глобального пул
- `metrics` флаг для экспорта текущих значений метрик из таблицы [system.metrics](../system-tables/metrics.md#system_tables-metrics).
- `events` флаг для экспорта текущих значений метрик из таблицы [system.events](../system-tables/events.md#system_tables-events).
- `asynchronous_metrics` флаг для экспорта текущих значений значения метрик из таблицы [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics).
- `errors` - флаг для экспорта количества ошибок (по кодам) случившихся с момента последнего рестарта сервера. Эта информация может быть получена из таблицы [system.errors](../system-tables/asynchronous_metrics.md#system_tables-errors)
**Пример**
@ -1225,6 +1226,7 @@ ClickHouse использует потоки из глобального пул
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
```

View File

@ -44,6 +44,8 @@ contents:
dst: /usr/bin/clickhouse-odbc-bridge
- src: root/usr/share/bash-completion/completions
dst: /usr/share/bash-completion/completions
- src: root/usr/share/clickhouse
dst: /usr/share/clickhouse
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static/AUTHORS

View File

@ -457,3 +457,10 @@ endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()
if (TARGET ch_contrib::protobuf)
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
foreach (proto_file IN LISTS google_proto_files)
install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
endforeach()
endif ()

View File

@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
/// Set path for format schema files
if (config().has("format_schema_path"))
global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
}

View File

@ -37,7 +37,7 @@
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
</prompt_by_server_display_name>
<!--
<!--
Settings adjustable via command-line parameters
can take their defaults from that config file, see examples:
@ -58,6 +58,9 @@
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
-->
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Analog of .netrc -->
<![CDATA[

View File

@ -41,6 +41,7 @@
<min_session_timeout_ms>10000</min_session_timeout_ms>
<session_timeout_ms>100000</session_timeout_ms>
<raft_logs_level>information</raft_logs_level>
<compress_logs>false</compress_logs>
<!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
</coordination_settings>

View File

@ -1279,6 +1279,8 @@ try
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn);
global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn);
@ -1578,6 +1580,10 @@ try
global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path);
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
/// Set path for filesystem caches
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
if (!filesystem_caches_path.empty())

View File

@ -3,6 +3,7 @@
<tmp_path replace="replace">./tmp/</tmp_path>
<user_files_path replace="replace">./user_files/</user_files_path>
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
<google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
<access_control_path replace="replace">./access/</access_control_path>
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
</clickhouse>

View File

@ -1428,6 +1428,10 @@
-->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
(both text logs and system.query_log).
name - name for the rule (optional)

View File

@ -51,6 +51,11 @@ enum class AccessType
M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
\
M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
\
M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
M(ALTER_MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION", TABLE, ALTER_PROJECTION) \

View File

@ -77,7 +77,7 @@ public:
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
data(place).count += countBytesInFilter(flags);
data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
}
else
{

View File

@ -0,0 +1,82 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Common/Exception.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
namespace
{
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
}
template <typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<GroupArraySortedNumericImpl>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
WhichDataType which(argument_type);
return std::make_shared<GroupArraySortedGeneralImpl<GroupArraySortedNodeGeneral>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
if (parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
}
else if (parameters.size() == 1)
{
auto type = parameters[0].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} does not support this number of arguments", name);
return createAggregateFunctionGroupArraySortedImpl(argument_types[0], parameters, max_elems);
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArraySorted, properties });
}
}

View File

@ -0,0 +1,355 @@
#pragma once
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Functions/array/arraySort.h>
#include <Common/Exception.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/IDataType.h>
#include <base/sort.h>
#include <Columns/IColumn.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/RadixSort.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
}
template <typename T>
struct GroupArraySortedData;
template <typename T>
struct GroupArraySortedData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value;
};
template <typename T>
class GroupArraySortedNumericImpl final
: public IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>
{
using Data = GroupArraySortedData<T>;
UInt64 max_elems;
SerializationPtr serialization;
public:
explicit GroupArraySortedNumericImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elems(max_elems_)
, serialization(data_type_->getDefaultSerialization())
{
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
auto & cur_elems = this->data(place);
cur_elems.value.push_back(row_value, arena);
/// To optimize, we sort (2 * max_size) elements of input array over and over again
/// and after each loop we delete the last half of sorted array
if (cur_elems.value.size() >= max_elems * 2)
{
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
cur_elems.value.resize(max_elems, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if (rhs_elems.value.empty())
return;
if (rhs_elems.value.size())
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
size_t elems_size = cur_elems.value.size() < max_elems ? cur_elems.value.size() : max_elems;
cur_elems.value.resize(elems_size, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
for (const auto & elem : value)
writeBinaryLittleEndian(elem, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
auto & value = this->data(place).value;
value.resize(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto& value = this->data(place).value;
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
size_t elems_size = value.size() < max_elems ? value.size() : max_elems;
value.resize(elems_size, arena);
size_t size = value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
value.resize(elems_size, arena);
}
}
bool allocatesMemoryInArena() const override { return true; }
};
template <typename Node, bool has_sampler>
struct GroupArraySortedGeneralData;
template <typename Node>
struct GroupArraySortedGeneralData<Node, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
using Array = PODArray<Field, 32, Allocator>;
Array value;
};
template <typename Node>
struct GroupArraySortedNodeBase
{
UInt64 size; // size of payload
/// Returns pointer to actual payload
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
};
struct GroupArraySortedNodeString : public GroupArraySortedNodeBase<GroupArraySortedNodeString>
{
using Node = GroupArraySortedNodeString;
};
struct GroupArraySortedNodeGeneral : public GroupArraySortedNodeBase<GroupArraySortedNodeGeneral>
{
using Node = GroupArraySortedNodeGeneral;
};
/// Implementation of groupArraySorted for Generic data via Array
template <typename Node>
class GroupArraySortedGeneralImpl final
: public IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>
{
using Data = GroupArraySortedGeneralData<Node, false>;
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
SerializationPtr serialization;
public:
GroupArraySortedGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, serialization(data_type->getDefaultSerialization())
{
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_elems = data(place);
cur_elems.value.push_back(columns[0][0][row_num], arena);
/// To optimize, we sort (2 * max_size) elements of input array over and over again and
/// after each loop we delete the last half of sorted array
if (cur_elems.value.size() >= max_elems * 2)
{
std::sort(cur_elems.value.begin(), cur_elems.value.begin() + (max_elems * 2));
cur_elems.value.erase(cur_elems.value.begin() + max_elems, cur_elems.value.begin() + (max_elems * 2));
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = data(place);
auto & rhs_elems = data(rhs);
if (rhs_elems.value.empty())
return;
UInt64 new_elems = rhs_elems.value.size();
for (UInt64 i = 0; i < new_elems; ++i)
cur_elems.value.push_back(rhs_elems.value[i], arena);
checkArraySize(cur_elems.value.size(), AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
if (!cur_elems.value.empty())
{
std::sort(cur_elems.value.begin(), cur_elems.value.end());
if (cur_elems.value.size() > max_elems)
cur_elems.value.resize(max_elems, arena);
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & value = data(place).value;
size_t size = value.size();
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
writeVarUInt(size, buf);
for (const Field & elem : value)
{
if (elem.isNull())
{
writeBinary(false, buf);
}
else
{
writeBinary(true, buf);
serialization->serializeBinary(elem, buf, {});
}
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
auto & value = data(place).value;
value.resize(size, arena);
for (Field & elem : value)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
serialization->deserializeBinary(elem, buf, {});
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto & column_array = assert_cast<ColumnArray &>(to);
auto & value = data(place).value;
if (!value.empty())
{
std::sort(value.begin(), value.end());
if (value.size() > max_elems)
value.resize_exact(max_elems, arena);
}
auto & offsets = column_array.getOffsets();
offsets.push_back(offsets.back() + value.size());
auto & column_data = column_array.getData();
if (std::is_same_v<Node, GroupArraySortedNodeString>)
{
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
string_offsets.reserve(string_offsets.size() + value.size());
}
for (const Field& field : value)
column_data.insert(field);
}
bool allocatesMemoryInArena() const override { return true; }
};
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
}

View File

@ -142,6 +142,7 @@ struct AggregateFunctionSumData
), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
{
ptr += start;
condition_map += start;
size_t count = end - start;
const auto * end_ptr = ptr + count;

View File

@ -100,7 +100,16 @@ public:
if (has_null_types)
{
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
/** Some functions, such as `count`, `uniq`, and others, return 0 :: UInt64 instead of NULL for a NULL argument.
* These functions have the `returns_default_when_only_null` property, so we explicitly specify the result type
* when replacing the function with `nothing`.
*
* Note: It's a bit dangerous to have the function result type depend on properties because we do not serialize properties in AST,
* and we can lose this information. For example, when we have `count(NULL)` replaced with `nothing(NULL) as "count(NULL)"` and send it
* to the remote server, the remote server will execute `nothing(NULL)` and return `NULL` while `0` is expected.
*
* To address this, we handle `nothing` in a special way in `FunctionNode::toASTImpl`.
*/
if (properties.returns_default_when_only_null)
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
else
@ -144,11 +153,18 @@ public:
}
else
{
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
#if 0
if (serialize_flag)
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
else
/// This should be <false, false> (no serialize flag) but it was initially added incorrectly and
/// changing it would break the binary compatibility of aggregation states using this method
// (such as AggregateFunction(argMaxOrNull, Nullable(Int64), UInt64)). The extra flag is harmless
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
}
#endif
}
}
}
};

View File

@ -289,15 +289,6 @@ public:
Arena * arena,
ssize_t if_argument_pos = -1) const = 0;
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const = 0;
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
* -Array combinator. It might also be used generally to break data dependency when array
@ -586,31 +577,6 @@ public:
}
}
void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; ++i)
{
if (flags[i])
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
else
{
for (size_t i = row_begin; i < row_end; ++i)
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
void addBatchArray(
size_t row_begin,
size_t row_end,

View File

@ -43,6 +43,7 @@ namespace ErrorCodes
template <typename T>
class QuantileTDigest
{
friend class TDigestStatistic;
using Value = Float32;
using Count = Float32;
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
@ -334,6 +335,44 @@ public:
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
}
Float64 getCountLessThan(Float64 value) const
{
bool first = true;
Count sum = 0;
Count prev_count = 0;
Float64 prev_x = 0;
Value prev_mean = 0;
for (const auto & c : centroids)
{
/// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
Float64 current_x = sum + c.count * 0.5;
if (c.mean >= value)
{
/// value is smaller than any value.
if (first)
return 0;
Float64 left = prev_x + 0.5 * (prev_count == 1);
Float64 right = current_x - 0.5 * (c.count == 1);
Float64 result = checkOverflow<Float64>(interpolate(
static_cast<Value>(value),
prev_mean,
static_cast<Value>(left),
c.mean,
static_cast<Value>(right)));
return result;
}
sum += c.count;
prev_mean = c.mean;
prev_count = c.count;
prev_x = current_x;
first = false;
}
/// count is larger than any value.
return count;
}
/** Calculates the quantile q [0, 1] based on the digest.
* For an empty digest returns NaN.
*/

View File

@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
@ -111,6 +112,7 @@ void registerAggregateFunctions()
registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionDeltaSumTimestamp(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory);

View File

@ -203,6 +203,18 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
function_ast->name = function_name;
if (function_name == "nothing")
{
/** Inside AggregateFunctionCombinatorNull we may replace functions with `NULL` in arguments with `nothing`.
* Result type of `nothing` depends on `returns_default_when_only_null` property of nested function.
* If we convert `nothing` to AST, we will lose this information, so we use original function name instead.
*/
const auto & original_ast = getOriginalAST();
const auto & original_function_ast = original_ast ? original_ast->as<ASTFunction>() : nullptr;
if (original_function_ast)
function_ast->name = original_function_ast->name;
}
if (isWindowFunction())
{
function_ast->is_window_function = true;

View File

@ -278,6 +278,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
if (it != replacement_map.end())
continue;
node_clone->original_ast = node_to_clone->original_ast;
node_clone->setAlias(node_to_clone->alias);
node_clone->children = node_to_clone->children;
node_clone->weak_pointers = node_to_clone->weak_pointers;
@ -318,6 +319,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
*weak_pointer_ptr = it->second;
}
result_cloned_node_place->original_ast = original_ast;
return result_cloned_node_place;
}

View File

@ -1,134 +0,0 @@
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/LambdaNode.h>
#include <Analyzer/ConstantNode.h>
namespace DB
{
namespace
{
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_move_functions_out_of_any)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// check function is any
const auto & function_name = function_node->getFunctionName();
if (function_name != "any" && function_name != "anyLast")
return;
auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 1)
return;
auto * inside_function_node = arguments[0]->as<FunctionNode>();
/// check argument is a function
if (!inside_function_node)
return;
/// check arguments can not contain arrayJoin or lambda
if (!canRewrite(inside_function_node))
return;
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
/// case any(f())
if (inside_function_node_arguments.empty())
return;
auto it = node_to_rewritten_node.find(node.get());
if (it != node_to_rewritten_node.end())
{
node = it->second;
return;
}
/// checking done, rewrite function
bool changed_argument = false;
for (auto & inside_argument : inside_function_node_arguments)
{
if (inside_argument->as<ConstantNode>()) /// skip constant node
break;
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
auto any_function = std::make_shared<FunctionNode>(function_name);
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
auto & any_function_arguments = any_function->getArguments().getNodes();
any_function_arguments.push_back(std::move(inside_argument));
inside_argument = std::move(any_function);
changed_argument = true;
}
if (changed_argument)
{
node_to_rewritten_node.emplace(node.get(), arguments[0]);
node = arguments[0];
}
}
private:
bool canRewrite(const FunctionNode * function_node)
{
for (const auto & argument : function_node->getArguments().getNodes())
{
if (argument->as<LambdaNode>())
return false;
if (const auto * inside_function = argument->as<FunctionNode>())
{
/// Function arrayJoin is special and should be skipped (think about it as
/// an aggregate function), otherwise wrong result will be produced.
/// For example:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
if (inside_function->getFunctionName() == "arrayJoin")
return false;
if (!canRewrite(inside_function))
return false;
}
}
return true;
}
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
/// So we can reuse the rewritten function.
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
};
}
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
*
* Example: SELECT any(f(x, y, g(z)));
* Result: SELECT f(any(x), any(y), g(any(z)));
*/
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
{
public:
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
String getDescription() override
{
return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -44,7 +44,6 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}

View File

@ -222,6 +222,7 @@ add_object_library(clickhouse_storages Storages)
add_object_library(clickhouse_storages_mysql Storages/MySQL)
add_object_library(clickhouse_storages_distributed Storages/Distributed)
add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)

View File

@ -2861,7 +2861,7 @@ void ClientBase::init(int argc, char ** argv)
("interactive", "Process queries-file or --query query and start interactive mode")
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
;
addOptions(options_description);
@ -2996,10 +2996,12 @@ void ClientBase::init(int argc, char ** argv)
clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/);
if (max_client_memory_usage != "0")
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
UInt64 max_client_memory_usage_int = parseWithSizeSuffix<UInt64>(max_client_memory_usage.c_str(), max_client_memory_usage.length());
total_memory_tracker.setHardLimit(max_client_memory_usage_int);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}

View File

@ -587,6 +587,8 @@
M(705, TABLE_NOT_EMPTY) \
M(706, LIBSSH_ERROR) \
M(707, GCP_ERROR) \
M(708, ILLEGAL_STATISTIC) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \

View File

@ -1,4 +1,5 @@
#include <Common/LockMemoryExceptionInThread.h>
#include <base/defines.h>
/// LockMemoryExceptionInThread
thread_local uint64_t LockMemoryExceptionInThread::counter = 0;
@ -18,3 +19,19 @@ LockMemoryExceptionInThread::~LockMemoryExceptionInThread()
level = previous_level;
block_fault_injections = previous_block_fault_injections;
}
void LockMemoryExceptionInThread::addUniqueLock(VariableContext level_, bool block_fault_injections_)
{
chassert(counter == 0);
counter = 1;
level = level_;
block_fault_injections = block_fault_injections_;
}
void LockMemoryExceptionInThread::removeUniqueLock()
{
chassert(counter == 1);
counter = 0;
level = VariableContext::Global;
block_fault_injections = false;
}

View File

@ -33,6 +33,9 @@ public:
LockMemoryExceptionInThread(const LockMemoryExceptionInThread &) = delete;
LockMemoryExceptionInThread & operator=(const LockMemoryExceptionInThread &) = delete;
static void addUniqueLock(VariableContext level_ = VariableContext::User, bool block_fault_injections_ = true);
static void removeUniqueLock();
static bool isBlocked(VariableContext current_level, bool fault_injection)
{
return counter > 0 && current_level >= level && (!fault_injection || block_fault_injections);

View File

@ -516,7 +516,7 @@ public:
if (record.header.version > CURRENT_CHANGELOG_VERSION)
throw Exception(
ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast<uint8_t>(record.header.version), filepath);
/// Read data
if (record.header.blob_size != 0)
@ -1480,4 +1480,9 @@ void Changelog::setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft_serv
raft_server = raft_server_;
}
bool Changelog::isInitialized() const
{
return initialized;
}
}

View File

@ -153,6 +153,8 @@ public:
void setRaftServer(const nuraft::ptr<nuraft::raft_server> & raft_server_);
bool isInitialized() const;
/// Fsync log to disk
~Changelog();

View File

@ -127,7 +127,8 @@ void KeeperLogStore::shutdownChangelog()
bool KeeperLogStore::flushChangelogAndShutdown()
{
std::lock_guard lock(changelog_lock);
changelog.flush();
if (changelog.isInitialized())
changelog.flush();
changelog.shutdown();
return true;
}

View File

@ -329,6 +329,20 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
params.return_method_ = nuraft::raft_params::async_handler;
nuraft::asio_service::options asio_opts{};
/// If asio worker threads fail in any way, NuRaft will stop to make any progress
/// For that reason we need to suppress out of memory exceptions in such threads
/// TODO: use `get_active_workers` to detect when we have no active workers to abort
asio_opts.worker_start_ = [](uint32_t /*worker_id*/)
{
LockMemoryExceptionInThread::addUniqueLock(VariableContext::Global);
};
asio_opts.worker_stop_ = [](uint32_t /*worker_id*/)
{
LockMemoryExceptionInThread::removeUniqueLock();
};
if (state_manager->isSecure())
{
#if USE_SSL

View File

@ -779,7 +779,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
if (itr == existing_snapshots.end())
throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx);
const auto & [path, disk] = itr->second;
disk->removeFile(path);
disk->removeFileIfExists(path);
existing_snapshots.erase(itr);
}
@ -809,8 +809,16 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor
disk->removeFile(tmp_snapshot_file_name);
existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk});
removeOutdatedSnapshotsIfNeeded();
moveSnapshotsIfNeeded();
try
{
removeOutdatedSnapshotsIfNeeded();
moveSnapshotsIfNeeded();
}
catch (...)
{
tryLogCurrentException(log, "Failed to cleanup and/or move older snapshots");
}
return {snapshot_file_name, disk};
}

View File

@ -101,6 +101,8 @@ namespace DB
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(String, get_client_http_header_forbidden_headers, "", "Comma separated list of http header names that will not be returned by function getClientHTTPHeader.", 0) \
M(Bool, allow_get_client_http_header, false, "Allow function getClientHTTPHeader", 0) \
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \

View File

@ -139,6 +139,9 @@ class IColumn;
M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
\
M(Bool, allow_statistic_optimize, false, "Allows using statistic to optimize queries", 0) \
M(Bool, allow_experimental_statistic, false, "Allows using statistic", 0) \
\
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \
@ -512,8 +515,10 @@ class IColumn;
M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
\
M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes.", 0) \
M(Bool, parsedatetime_parse_without_leading_zeros, true, "Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.", 0) \
M(Bool, formatdatetime_format_without_leading_zeros, false, "Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.", 0) \
\
M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
@ -554,7 +559,6 @@ class IColumn;
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
@ -592,7 +596,7 @@ class IColumn;
M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
@ -823,7 +827,6 @@ class IColumn;
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
M(Bool, allow_experimental_undrop_table_query, true, "Allow to use undrop query to restore dropped table in a limited time", 0) \
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
@ -893,6 +896,8 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -124,6 +124,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
{"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
{"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
{"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},

View File

@ -459,22 +459,25 @@ template <typename Enum, typename Traits>
struct SettingFieldMultiEnum
{
using EnumType = Enum;
using ValueType = MultiEnum<Enum>;
using StorageType = typename ValueType::StorageType;
using ValueType = std::vector<Enum>;
ValueType value;
bool changed = false;
explicit SettingFieldMultiEnum(ValueType v = ValueType{}) : value{v} {}
explicit SettingFieldMultiEnum(EnumType e) : value{e} {}
explicit SettingFieldMultiEnum(StorageType s) : value(s) {}
explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet<const String &>())) {}
operator ValueType() const { return value; } /// NOLINT
explicit operator StorageType() const { return value.getValue(); }
explicit operator Field() const { return toString(); }
operator MultiEnum<EnumType>() const /// NOLINT
{
MultiEnum<EnumType> res;
for (const auto & v : value)
res.set(v);
return res;
}
SettingFieldMultiEnum & operator= (StorageType x) { changed = true; value.setValue(x); return *this; }
SettingFieldMultiEnum & operator= (ValueType x) { changed = true; value = x; return *this; }
SettingFieldMultiEnum & operator= (const Field & x) { parseFromString(x.safeGet<const String &>()); return *this; }
@ -482,14 +485,10 @@ struct SettingFieldMultiEnum
{
static const String separator = ",";
String result;
for (StorageType i = 0; i < Traits::getEnumSize(); ++i)
for (const auto & v : value)
{
const auto v = static_cast<Enum>(i);
if (value.isSet(v))
{
result += Traits::toString(v);
result += separator;
}
result += Traits::toString(v);
result += separator;
}
if (!result.empty())
@ -508,6 +507,7 @@ private:
static const String separators=", ";
ValueType result;
std::unordered_set<EnumType> values_set;
//to avoid allocating memory on substr()
const std::string_view str_view{str};
@ -519,7 +519,12 @@ private:
if (value_end == std::string::npos)
value_end = str_view.size();
result.set(Traits::fromString(str_view.substr(value_start, value_end - value_start)));
auto value = Traits::fromString(str_view.substr(value_start, value_end - value_start));
/// Deduplicate values
auto [_, inserted] = values_set.emplace(value);
if (inserted)
result.push_back(value);
value_start = str_view.find_first_not_of(separators, value_end);
}
@ -554,7 +559,8 @@ void SettingFieldMultiEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
static EnumType fromString(std::string_view str); \
}; \
\
using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>;
using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>; \
using NEW_NAME##List = typename SettingField##NEW_NAME::ValueType;
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \

View File

@ -27,16 +27,16 @@ bool operator== (const Field & f, const SettingFieldMultiEnum<Enum, Traits> & se
}
GTEST_TEST(MySQLDataTypesSupport, WithDefault)
GTEST_TEST(SettingMySQLDataTypesSupport, WithDefault)
{
// Setting can be default-initialized and that means all values are unset.
const SettingMySQLDataTypesSupport setting;
ASSERT_EQ(0, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
ASSERT_EQ("", setting.toString());
ASSERT_EQ(setting, Field(""));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
}
GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL)
@ -44,10 +44,10 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL)
// Setting can be initialized with MySQLDataTypesSupport::DECIMAL
// and this value can be obtained in varios forms with getters.
const SettingMySQLDataTypesSupport setting(MySQLDataTypesSupport::DECIMAL);
ASSERT_EQ(1, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DECIMAL}, setting.value);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal", setting.toString());
ASSERT_EQ(Field("decimal"), setting);
@ -57,95 +57,69 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDATE)
{
SettingMySQLDataTypesSupport setting;
setting = String("date2Date32");
ASSERT_EQ(4, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DATE2DATE32}, setting.value);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2DATE32));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("date2Date32", setting.toString());
ASSERT_EQ(Field("date2Date32"), setting);
setting = String("date2String");
ASSERT_EQ(8, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{MySQLDataTypesSupport::DATE2STRING}, setting.value);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2STRING));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2STRING));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATE2DATE32));
ASSERT_EQ("date2String", setting.toString());
ASSERT_EQ(Field("date2String"), setting);
}
GTEST_TEST(SettingMySQLDataTypesSupport, With1)
{
// Setting can be initialized with int value corresponding to DECIMAL
// and rest of the test is the same as for that value.
const SettingMySQLDataTypesSupport setting(1u);
ASSERT_EQ(1, setting.value.getValue());
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal", setting.toString());
ASSERT_EQ(Field("decimal"), setting);
}
GTEST_TEST(SettingMySQLDataTypesSupport, WithMultipleValues)
{
// Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64)
const SettingMySQLDataTypesSupport setting(3u);
ASSERT_EQ(3, setting.value.getValue());
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal,datetime64", setting.toString());
ASSERT_EQ(Field("decimal,datetime64"), setting);
}
GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
{
SettingMySQLDataTypesSupport setting;
setting = String("decimal");
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal", setting.toString());
ASSERT_EQ(Field("decimal"), setting);
setting = "datetime64,decimal";
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal,datetime64", setting.toString());
ASSERT_EQ(Field("decimal,datetime64"), setting);
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("datetime64,decimal", setting.toString());
ASSERT_EQ(Field("datetime64,decimal"), setting);
// comma with spaces
setting = " datetime64 , decimal "; /// bad punctuation is ok here
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal,datetime64", setting.toString());
ASSERT_EQ(Field("decimal,datetime64"), setting);
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("datetime64,decimal", setting.toString());
ASSERT_EQ(Field("datetime64,decimal"), setting);
setting = String(",,,,,,,, ,decimal");
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal", setting.toString());
ASSERT_EQ(Field("decimal"), setting);
setting = String(",decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,");
ASSERT_TRUE(setting.changed); //since previous value was DECIMAL
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_TRUE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("decimal", setting.toString());
ASSERT_EQ(Field("decimal"), setting);
setting = String("");
ASSERT_TRUE(setting.changed);
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_FALSE(MultiEnum<MySQLDataTypesSupport>(setting).isSet(MySQLDataTypesSupport::DATETIME64));
ASSERT_EQ("", setting.toString());
ASSERT_EQ(Field(""), setting);
}
@ -156,13 +130,13 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
SettingMySQLDataTypesSupport setting;
EXPECT_THROW(setting = String("FOOBAR"), Exception);
ASSERT_FALSE(setting.changed);
ASSERT_EQ(0, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
EXPECT_THROW(setting = String("decimal,datetime64,123"), Exception);
ASSERT_FALSE(setting.changed);
ASSERT_EQ(0, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
EXPECT_NO_THROW(setting = String(", "));
ASSERT_TRUE(setting.changed);
ASSERT_EQ(0, setting.value.getValue());
ASSERT_EQ(std::vector<MySQLDataTypesSupport>{}, setting.value);
}

View File

@ -60,22 +60,22 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::File;
auto file_info = object_storage.getFileInfo(path);
return file_info.type == WebObjectStorage::FileType::File;
}
bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).type == WebObjectStorage::FileType::Directory;
auto file_info = object_storage.getFileInfo(path);
return file_info.type == WebObjectStorage::FileType::Directory;
}
uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
{
assertExists(path);
std::shared_lock shared_lock(object_storage.metadata_mutex);
return object_storage.files.at(path).size;
auto file_info = object_storage.getFileInfo(path);
return file_info.size;
}
StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
@ -86,8 +86,8 @@ StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const s
std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
remote_path = remote_path.substr(object_storage.url.size());
std::shared_lock shared_lock(object_storage.metadata_mutex);
return {StoredObject(remote_path, path, object_storage.files.at(path).size)};
auto file_info = object_storage.getFileInfo(path);
return {StoredObject(remote_path, path, file_info.size)};
}
std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const

View File

@ -28,6 +28,7 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int FILE_DOESNT_EXIST;
}
void WebObjectStorage::initialize(const String & uri_path, const std::unique_lock<std::shared_mutex> & lock) const
@ -124,7 +125,19 @@ bool WebObjectStorage::exists(const StoredObject & object) const
bool WebObjectStorage::exists(const std::string & path) const
{
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
return tryGetFileInfo(path) != std::nullopt;
}
WebObjectStorage::FileData WebObjectStorage::getFileInfo(const String & path) const
{
auto file_info = tryGetFileInfo(path);
if (!file_info)
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "No such file: {}", path);
return file_info.value();
}
std::optional<WebObjectStorage::FileData> WebObjectStorage::tryGetFileInfo(const String & path) const
{
std::shared_lock shared_lock(metadata_mutex);
if (files.find(path) == files.end())
@ -145,10 +158,10 @@ bool WebObjectStorage::exists(const std::string & path) const
}
if (files.empty())
return false;
return std::nullopt;
if (files.contains(path))
return true;
if (auto it = files.find(path); it != files.end())
return it->second;
/// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /`
/// (specific table files only), but we need to be able to also tell if `exists(<metadata_path>)`, for example.
@ -158,7 +171,7 @@ bool WebObjectStorage::exists(const std::string & path) const
);
if (it == files.end())
return false;
return std::nullopt;
if (startsWith(it->first, path)
|| (it != files.begin() && startsWith(std::prev(it)->first, path)))
@ -166,20 +179,15 @@ bool WebObjectStorage::exists(const std::string & path) const
shared_lock.unlock();
std::unique_lock unique_lock(metadata_mutex);
/// The code relies on invariant that if this function returned true
/// the file exists in files.
/// In this case we have a directory which doesn't explicitly exists (like store/xxx/yyy)
/// ^^^^^
/// Adding it to the files
/// Add this directory path not files cache to simplify further checks for this path.
files.emplace(std::make_pair(path, FileData({.type = FileType::Directory})));
unique_lock.unlock();
shared_lock.lock();
return true;
return FileData{ .type = FileType::Directory };
}
return false;
return std::nullopt;
}
std::unique_ptr<ReadBufferFromFileBase> WebObjectStorage::readObjects( /// NOLINT

View File

@ -118,6 +118,9 @@ protected:
mutable Files files;
mutable std::shared_mutex metadata_mutex;
std::optional<FileData> tryGetFileInfo(const String & path) const;
FileData getFileInfo(const String & path) const;
private:
void initialize(const String & path, const std::unique_lock<std::shared_mutex> &) const;

View File

@ -151,6 +151,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;
format_settings.protobuf.use_autogenerated_schema = settings.format_protobuf_use_autogenerated_schema;
format_settings.protobuf.google_protos_path = context->getGoogleProtosPath();
format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule;
format_settings.regexp.regexp = settings.format_regexp;
format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched;
@ -292,7 +293,7 @@ InputFormatPtr FormatFactory::getInput(
// Decide whether to use ParallelParsingInputFormat.
bool parallel_parsing =
max_parsing_threads > 1 && settings.input_format_parallel_parsing && creators.file_segmentation_engine &&
max_parsing_threads > 1 && settings.input_format_parallel_parsing && creators.file_segmentation_engine_creator &&
!creators.random_access_input_creator && !need_only_count;
if (settings.max_memory_usage && settings.min_chunk_bytes_for_parallel_parsing * max_parsing_threads * 2 > settings.max_memory_usage)
@ -322,7 +323,7 @@ InputFormatPtr FormatFactory::getInput(
{ return input_getter(input, sample, row_input_format_params, format_settings); };
ParallelParsingInputFormat::Params params{
buf, sample, parser_creator, creators.file_segmentation_engine, name, max_parsing_threads,
buf, sample, parser_creator, creators.file_segmentation_engine_creator, name, format_settings, max_parsing_threads,
settings.min_chunk_bytes_for_parallel_parsing, max_block_size, context->getApplicationType() == Context::ApplicationType::SERVER};
format = std::make_shared<ParallelParsingInputFormat>(params);
@ -668,10 +669,22 @@ String FormatFactory::getFormatFromFileDescriptor(int fd)
void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)
{
auto & target = dict[name].file_segmentation_engine;
auto & target = dict[name].file_segmentation_engine_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine {} is already registered", name);
target = std::move(file_segmentation_engine);
auto creator = [file_segmentation_engine](const FormatSettings &)
{
return file_segmentation_engine;
};
target = std::move(creator);
}
void FormatFactory::registerFileSegmentationEngineCreator(const String & name, FileSegmentationEngineCreator file_segmentation_engine_creator)
{
auto & target = dict[name].file_segmentation_engine_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine creator {} is already registered", name);
target = std::move(file_segmentation_engine_creator);
}
void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator)

View File

@ -71,6 +71,9 @@ public:
size_t min_bytes,
size_t max_rows)>;
using FileSegmentationEngineCreator = std::function<FileSegmentationEngine(
const FormatSettings & settings)>;
private:
// On the input side, there are two kinds of formats:
// * InputCreator - formats parsed sequentially, e.g. CSV. Almost all formats are like this.
@ -132,7 +135,7 @@ private:
InputCreator input_creator;
RandomAccessInputCreator random_access_input_creator;
OutputCreator output_creator;
FileSegmentationEngine file_segmentation_engine;
FileSegmentationEngineCreator file_segmentation_engine_creator;
SchemaReaderCreator schema_reader_creator;
ExternalSchemaReaderCreator external_schema_reader_creator;
bool supports_parallel_formatting{false};
@ -203,6 +206,8 @@ public:
void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine);
void registerFileSegmentationEngineCreator(const String & name, FileSegmentationEngineCreator file_segmentation_engine_creator);
void registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker);
void registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker);

View File

@ -295,6 +295,7 @@ struct FormatSettings
bool allow_multiple_rows_without_delimiter = false;
bool skip_fields_with_unsupported_types_in_schema_inference = false;
bool use_autogenerated_schema = true;
std::string google_protos_path;
} protobuf;
struct

View File

@ -30,11 +30,11 @@ void ProtobufSchemas::clear()
class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector
{
public:
explicit ImporterWithSourceTree(const String & schema_directory, WithEnvelope with_envelope_)
: importer(&disk_source_tree, this)
, with_envelope(with_envelope_)
explicit ImporterWithSourceTree(const String & schema_directory, const String & google_protos_path, WithEnvelope with_envelope_)
: importer(&disk_source_tree, this), with_envelope(with_envelope_)
{
disk_source_tree.MapPath("", schema_directory);
disk_source_tree.MapPath("", google_protos_path);
}
~ImporterWithSourceTree() override = default;
@ -112,12 +112,17 @@ private:
};
const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope)
const google::protobuf::Descriptor *
ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
{
std::lock_guard lock(mutex);
auto it = importers.find(info.schemaDirectory());
if (it == importers.end())
it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), with_envelope)).first;
it = importers
.emplace(
info.schemaDirectory(),
std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
.first;
auto * importer = it->second.get();
return importer->import(info.schemaPath(), info.messageName());
}

View File

@ -59,7 +59,8 @@ public:
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope);
const google::protobuf::Descriptor *
getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
private:
class ImporterWithSourceTree;

View File

@ -195,7 +195,11 @@ struct ArrayElementNumImpl
if (index < array_size)
{
size_t j = !negative ? (current_offset + index) : (offsets[i] - index - 1);
size_t j;
if constexpr (negative)
j = offsets[i] - index - 1;
else
j = current_offset + index;
result[i] = data[j];
if (builder)
builder.update(j);
@ -260,7 +264,7 @@ struct ArrayElementNumImpl
struct ArrayElementStringImpl
{
template <bool negative>
template <bool negative, bool used_builder>
static void vectorConst(
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
const ColumnArray::Offset index,
@ -269,21 +273,31 @@ struct ArrayElementStringImpl
{
size_t size = offsets.size();
result_offsets.resize(size);
result_data.reserve(data.size());
ColumnArray::Offset current_offset = 0;
ColumnArray::Offset current_result_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0;
ColumnString::Chars zero_buf(1);
zero_buf.push_back(0);
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
selected_bufs.reserve(size);
for (size_t i = 0; i < size; ++i)
{
size_t array_size = offsets[i] - current_offset;
if (index < array_size)
{
size_t adjusted_index = !negative ? index : (array_size - index - 1);
size_t adjusted_index;
if constexpr (negative)
adjusted_index = array_size - index - 1;
else
adjusted_index = index;
size_t j = current_offset + adjusted_index;
if (builder)
if constexpr (used_builder)
{
size_t j = current_offset + adjusted_index;
builder.update(j);
}
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
? 0
@ -291,30 +305,36 @@ struct ArrayElementStringImpl
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
result_data.resize(current_result_offset + string_size);
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
current_result_offset += string_size;
result_offsets[i] = current_result_offset;
total_result_bytes += string_size;
selected_bufs.emplace_back(&data[string_pos], string_size);
result_offsets[i] = total_result_bytes;
}
else
{
/// Insert an empty row.
result_data.resize(current_result_offset + 1);
result_data[current_result_offset] = 0;
current_result_offset += 1;
result_offsets[i] = current_result_offset;
total_result_bytes += 1;
selected_bufs.emplace_back(zero_buf.data(), 1);
result_offsets[i] = total_result_bytes;
if (builder)
if constexpr (used_builder)
builder.update();
}
current_offset = offsets[i];
}
ColumnArray::Offset current_result_offset = 0;
result_data.resize(total_result_bytes);
for (const auto & buf : selected_bufs)
{
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
current_result_offset += buf.second;
}
}
/** Implementation for non-constant index.
*/
template <typename TIndex>
template <typename TIndex, bool used_builder>
static void vector(
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
const PaddedPODArray<TIndex> & indices,
@ -323,10 +343,14 @@ struct ArrayElementStringImpl
{
size_t size = offsets.size();
result_offsets.resize(size);
result_data.reserve(data.size());
ColumnString::Chars zero_buf(1);
zero_buf.push_back(0);
ColumnArray::Offset current_offset = 0;
ColumnArray::Offset current_result_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0;
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
selected_bufs.reserve(size);
for (size_t i = 0; i < size; ++i)
{
size_t array_size = offsets[i] - current_offset;
@ -342,35 +366,43 @@ struct ArrayElementStringImpl
if (adjusted_index < array_size)
{
size_t j = current_offset + adjusted_index;
if (builder)
if constexpr (used_builder)
{
size_t j = current_offset + adjusted_index;
builder.update(j);
}
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
? 0
: string_offsets[current_offset + adjusted_index - 1];
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
total_result_bytes += string_size;
selected_bufs.emplace_back(&data[string_pos], string_size);
result_data.resize(current_result_offset + string_size);
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
current_result_offset += string_size;
result_offsets[i] = current_result_offset;
result_offsets[i] = total_result_bytes;
}
else
{
/// Insert empty string
result_data.resize(current_result_offset + 1);
result_data[current_result_offset] = 0;
current_result_offset += 1;
result_offsets[i] = current_result_offset;
total_result_bytes += 1;
selected_bufs.emplace_back(zero_buf.data(), 1);
result_offsets[i] = total_result_bytes;
if (builder)
if constexpr (used_builder)
builder.update();
}
current_offset = offsets[i];
}
ColumnArray::Offset current_result_offset = 0;
result_data.resize(total_result_bytes);
for (const auto & buf : selected_bufs)
{
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
current_result_offset += buf.second;
}
}
};
@ -542,23 +574,47 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument
if (index.getType() == Field::Types::UInt64
|| (index.getType() == Field::Types::Int64 && index.get<Int64>() >= 0))
ArrayElementStringImpl::vectorConst<false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
{
if (builder)
ArrayElementStringImpl::vectorConst<false, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vectorConst<false, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
index.get<UInt64>() - 1,
col_res->getChars(),
col_res->getOffsets(),
builder);
}
else if (index.getType() == Field::Types::Int64)
ArrayElementStringImpl::vectorConst<true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
{
if (builder)
ArrayElementStringImpl::vectorConst<true, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vectorConst<true, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
-(UInt64(index.get<Int64>()) + 1),
col_res->getChars(),
col_res->getOffsets(),
builder);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index");
@ -580,14 +636,25 @@ ColumnPtr FunctionArrayElement::executeString(
return nullptr;
auto col_res = ColumnString::create();
ArrayElementStringImpl::vector<IndexType>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
if (builder)
ArrayElementStringImpl::vector<IndexType, true>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
else
ArrayElementStringImpl::vector<IndexType, false>(
col_nested->getChars(),
col_array->getOffsets(),
col_nested->getOffsets(),
indices,
col_res->getChars(),
col_res->getOffsets(),
builder);
return col_res;
}

View File

@ -322,6 +322,18 @@ private:
return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
}
size_t mysqlMonthWithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto month = ToMonthImpl::execute(source, timezone);
if (month < 10)
{
dest[0] = '0' + month;
return 1;
}
else
return writeNumber2(dest, month);
}
static size_t monthOfYearText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto month = ToMonthImpl::execute(source, timezone);
@ -404,10 +416,36 @@ private:
return writeNumber2(dest, ToHourImpl::execute(source, timezone));
}
size_t mysqlHour24WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto hour = ToHourImpl::execute(source, timezone);
if (hour < 10)
{
dest[0] = '0' + hour;
return 1;
}
else
return writeNumber2(dest, hour);
}
size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto x = ToHourImpl::execute(source, timezone);
return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
auto hour = ToHourImpl::execute(source, timezone);
hour = (hour == 0) ? 12 : (hour > 12 ? hour - 12 : hour);
return writeNumber2(dest, hour);
}
size_t mysqlHour12WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto hour = ToHourImpl::execute(source, timezone);
hour = hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour);
if (hour < 10)
{
dest[0] = '0' + hour;
return 1;
}
else
return writeNumber2(dest, hour);
}
size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@ -689,10 +727,11 @@ private:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
}
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool mysql_format_ckl_without_leading_zeros)
{
static constexpr std::array variable_width_formatter = {'W'};
static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
static constexpr std::array variable_width_formatter_leading_zeros = {'c', 'l', 'k'};
for (size_t i = 0; i < format.size(); ++i)
{
@ -708,6 +747,13 @@ private:
[&](char c){ return c == format[i + 1]; }))
return false;
}
if (mysql_format_ckl_without_leading_zeros)
{
if (std::any_of(
variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
[&](char c){ return c == format[i + 1]; }))
return false;
}
else
{
if (std::any_of(
@ -727,6 +773,7 @@ private:
const bool mysql_M_is_month_name;
const bool mysql_f_prints_single_zero;
const bool mysql_format_ckl_without_leading_zeros;
public:
static constexpr auto name = Name::name;
@ -736,6 +783,7 @@ public:
explicit FunctionFormatDateTimeImpl(ContextPtr context)
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
, mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
, mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros)
{
}
@ -885,7 +933,7 @@ public:
/// column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
/// instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
/// written which is obviously slow.
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name) : false;
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, mysql_format_ckl_without_leading_zeros) : false;
using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
std::vector<Instruction<T>> instructions;
@ -1077,12 +1125,22 @@ public:
break;
}
// Month as a integer number (01-12)
// Month as a integer number:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 1-12
// - otherwise: print with leading zeros: i.e. 01-12
case 'c':
{
Instruction<T> instruction;
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
instructions.push_back(std::move(instruction));
if (mysql_format_ckl_without_leading_zeros)
{
instruction.setMysqlFunc(&Instruction<T>::mysqlMonthWithoutLeadingZero);
instructions.push_back(std::move(instruction));
}
else
{
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
instructions.push_back(std::move(instruction));
}
out_template += "00";
break;
}
@ -1391,20 +1449,30 @@ public:
break;
}
// Hour in 24h format (00-23)
// Hour in 24h format:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-23
// - otherwise: print with leading zeros: i.e. 00-23
case 'k':
{
static constexpr std::string_view val = "00";
add_time_instruction(&Instruction<T>::mysqlHour24, val);
if (mysql_format_ckl_without_leading_zeros)
add_time_instruction(&Instruction<T>::mysqlHour24WithoutLeadingZero, val);
else
add_time_instruction(&Instruction<T>::mysqlHour24, val);
out_template += val;
break;
}
// Hour in 12h format (01-12)
// Hour in 12h format:
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-12
// - otherwise: print with leading zeros: i.e. 00-12
case 'l':
{
static constexpr std::string_view val = "12";
add_time_instruction(&Instruction<T>::mysqlHour12, val);
if (mysql_format_ckl_without_leading_zeros)
add_time_instruction(&Instruction<T>::mysqlHour12WithoutLeadingZero, val);
else
add_time_instruction(&Instruction<T>::mysqlHour12, val);
out_template += val;
break;
}

View File

@ -0,0 +1,116 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
#include "Disks/DiskType.h"
#include "Interpreters/Context_fwd.h"
#include <Core/Field.h>
#include <Poco/Net/NameValueCollection.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int FUNCTION_NOT_ALLOWED;
extern const int BAD_ARGUMENTS;
}
namespace
{
/** Get the value of parameter in http headers.
* If there no such parameter or the method of request is not
* http, the function will throw an exception.
*/
class FunctionGetClientHTTPHeader : public IFunction, WithContext
{
private:
public:
explicit FunctionGetClientHTTPHeader(ContextPtr context_): WithContext(context_) {}
static constexpr auto name = "getClientHTTPHeader";
static FunctionPtr create(ContextPtr context_)
{
return std::make_shared<FunctionGetClientHTTPHeader>(context_);
}
bool useDefaultImplementationForConstants() const override { return true; }
String getName() const override { return name; }
bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override
{
return 1;
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!getContext()->allowGetHTTPHeaderFunction())
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "The function {} is not enabled, you can set allow_get_client_http_header in config file.", getName());
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must have String type", getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const auto & client_info = getContext()->getClientInfo();
const auto & method = client_info.http_method;
const auto & headers = client_info.headers;
const IColumn * arg_column = arguments[0].column.get();
const ColumnString * arg_string = checkAndGetColumn<ColumnString>(arg_column);
if (!arg_string)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant String", getName());
if (method != ClientInfo::HTTPMethod::GET && method != ClientInfo::HTTPMethod::POST)
return result_type->createColumnConstWithDefaultValue(input_rows_count);
auto result_column = ColumnString::create();
const String default_value;
const std::unordered_set<String> & forbidden_header_list = getContext()->getClientHTTPHeaderForbiddenHeaders();
for (size_t row = 0; row < input_rows_count; ++row)
{
auto header_name = arg_string->getDataAt(row).toString();
if (!headers.has(header_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} is not in HTTP request headers.", header_name);
else
{
auto it = forbidden_header_list.find(header_name);
if (it != forbidden_header_list.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in get_client_http_header_forbidden_headers, you can config it in config file.", header_name);
const String & value = headers[header_name];
result_column->insertData(value.data(), value.size());
}
}
return result_column;
}
};
}
REGISTER_FUNCTION(GetHttpHeader)
{
factory.registerFunction<FunctionGetClientHTTPHeader>();
}
}

View File

@ -466,12 +466,14 @@ namespace
{
public:
const bool mysql_M_is_month_name;
const bool mysql_parse_ckl_without_leading_zeros;
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
explicit FunctionParseDateTimeImpl(ContextPtr context)
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
, mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros)
{
}
@ -835,6 +837,14 @@ namespace
return cur;
}
static Pos mysqlMonthWithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 month;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, month);
date.setMonth(month);
return cur;
}
static Pos mysqlCentury(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 century;
@ -1131,6 +1141,14 @@ namespace
return cur;
}
static Pos mysqlHour12WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
date.setHour(hour, true, true);
return cur;
}
static Pos mysqlHour24(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
@ -1139,6 +1157,14 @@ namespace
return cur;
}
static Pos mysqlHour24WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
{
Int32 hour;
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
date.setHour(hour, false, false);
return cur;
}
static Pos readNumberWithVariableLength(
Pos cur,
Pos end,
@ -1490,9 +1516,14 @@ namespace
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextShort));
break;
// Month as a decimal number (01-12)
// Month as a decimal number:
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 1-12
// - else: with leading zero required, i.e. 01-12
case 'c':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthWithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
break;
// Year, divided by 100, zero-padded
@ -1645,14 +1676,24 @@ namespace
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
break;
// Hour in 24h format (00-23)
// Hour in 24h format:
// - if parsedatetime_parse_without_leading_zeros = true, possibly without leading zero: i.e. 0-23
// - else with leading zero required: i.e. 00-23
case 'k':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24WithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
break;
// Hour in 12h format (01-12)
// Hour in 12h format:
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 0-12
// - else with leading zero required: i.e. 00-12
case 'l':
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
if (mysql_parse_ckl_without_leading_zeros)
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12WithoutLeadingZero));
else
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
break;
case 't':

View File

@ -225,10 +225,10 @@ private:
if constexpr (is_decimal<T>)
{
const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
UInt8 from_scale = from_col->getScale();
if (from_col)
{
UInt8 from_scale = from_col->getScale();
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
else if (precision_col)

View File

@ -649,7 +649,7 @@ Aws::String SSOCredentialsProvider::loadAccessTokenFile(const Aws::String & sso_
}
else
{
LOG_TRACE(logger, "Unable to open token file on path: {}", sso_access_token_path);
LOG_TEST(logger, "Unable to open token file on path: {}", sso_access_token_path);
return "";
}
}

View File

@ -26,7 +26,7 @@ struct URIConverter
static void modifyURI(Poco::URI & uri, std::unordered_map<std::string, std::string> mapper)
{
Macros macros({{"bucket", uri.getHost()}});
uri = macros.expand(mapper[uri.getScheme()]).empty()? uri : Poco::URI(macros.expand(mapper[uri.getScheme()]) + "/" + uri.getPathAndQuery());
uri = macros.expand(mapper[uri.getScheme()]).empty() ? uri : Poco::URI(macros.expand(mapper[uri.getScheme()]) + uri.getPathAndQuery());
}
};

View File

@ -1446,18 +1446,15 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey(
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
{
if (inst->offsets)
inst->batch_that->addBatchSinglePlaceFromInterval(
inst->batch_that->addBatchSinglePlace(
inst->offsets[static_cast<ssize_t>(row_begin) - 1],
inst->offsets[row_end - 1],
res + inst->state_offset,
inst->batch_arguments, data_variants.aggregates_pool);
else
inst->batch_that->addBatchSinglePlaceFromInterval(
row_begin,
row_end,
res + inst->state_offset,
inst->batch_arguments,
data_variants.aggregates_pool);
else
inst->batch_that->addBatchSinglePlace(
row_begin, row_end, res + inst->state_offset, inst->batch_arguments, data_variants.aggregates_pool);
}
}

View File

@ -206,10 +206,19 @@ Block ArrayJoinResultIterator::next()
bool is_left = array_join->is_left;
auto cut_any_col = any_array->cut(current_row, next_row - current_row);
const auto * cut_any_array = typeid_cast<const ColumnArray *>(cut_any_col.get());
for (size_t i = 0; i < num_columns; ++i)
{
ColumnWithTypeAndName current = block.safeGetByPosition(i);
current.column = current.column->cut(current_row, next_row - current_row);
/// Reuse cut_any_col if possible to avoid unnecessary cut.
if (!is_unaligned && !is_left && current.name == *columns.begin())
{
current.column = cut_any_col;
current.type = getArrayJoinDataType(current.type);
}
else
current.column = current.column->cut(current_row, next_row - current_row);
if (columns.contains(current.name))
{

View File

@ -2,6 +2,7 @@
#include <Core/UUID.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/Net/NameValueCollection.h>
#include <base/types.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/VersionNumber.h>
@ -96,6 +97,7 @@ public:
/// For mysql and postgresql
UInt64 connection_id = 0;
Poco::Net::NameValueCollection headers;
/// Comma separated list of forwarded IP addresses (from X-Forwarded-For for HTTP interface).
/// It's expected that proxy appends the forwarded address to the end of the list.

View File

@ -52,6 +52,7 @@ public:
size_t getTotalByteCount() const override;
bool alwaysReturnsEmptySet() const override;
bool supportParallelJoin() const override { return true; }
IBlocksStreamPtr
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;

View File

@ -82,6 +82,7 @@
#include <Storages/StorageView.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <re2/re2.h>
#include <Poco/Net/NameValueCollection.h>
#include <Poco/UUID.h>
#include <Poco/Util/Application.h>
#include <Common/Config/AbstractConfigurationComparison.h>
@ -337,12 +338,15 @@ struct ContextSharedPart : boost::noncopyable
std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines.
std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines.
std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
std::unordered_set<String> get_client_http_header_forbidden_headers;
bool allow_get_client_http_header;
std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
std::atomic_size_t max_database_num_to_warn = 1000lu;
std::atomic_size_t max_table_num_to_warn = 5000lu;
std::atomic_size_t max_part_num_to_warn = 100000lu;
/// No lock required for format_schema_path modified only during initialization
String format_schema_path; /// Path to a directory that contains schema files used by input formats.
String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
mutable OnceFlag action_locks_manager_initialized;
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
OnceFlag system_logs_initialized;
@ -4142,6 +4146,28 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
}
void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers)
{
std::unordered_set<String> forbidden_header_list;
boost::split(forbidden_header_list, forbidden_headers, [](char c) { return c == ','; });
shared->get_client_http_header_forbidden_headers = forbidden_header_list;
}
void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
{
shared->allow_get_client_http_header = allow_get_http_header_function;
}
const std::unordered_set<String> & Context::getClientHTTPHeaderForbiddenHeaders() const
{
return shared->get_client_http_header_forbidden_headers;
}
bool Context::allowGetHTTPHeaderFunction() const
{
return shared->allow_get_client_http_header;
}
void Context::setMaxPartitionSizeToDrop(size_t max_size)
{
// Is initialized at server startup and updated at config reload
@ -4314,6 +4340,16 @@ void Context::setFormatSchemaPath(const String & path)
shared->format_schema_path = path;
}
String Context::getGoogleProtosPath() const
{
return shared->google_protos_path;
}
void Context::setGoogleProtosPath(const String & path)
{
shared->google_protos_path = path;
}
Context::SampleBlockCache & Context::getSampleBlockCache() const
{
assert(hasQueryContext());
@ -4469,12 +4505,19 @@ void Context::setClientConnectionId(uint32_t connection_id_)
client_info.connection_id = connection_id_;
}
void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
void Context::setHttpClientInfo(
ClientInfo::HTTPMethod http_method,
const String & http_user_agent,
const String & http_referer,
const Poco::Net::NameValueCollection & http_headers)
{
client_info.http_method = http_method;
client_info.http_user_agent = http_user_agent;
client_info.http_referer = http_referer;
need_recalculate_access = true;
if (!http_headers.empty())
client_info.headers = http_headers;
}
void Context::setForwardedFor(const String & forwarded_for)

View File

@ -26,6 +26,8 @@
#include <Server/HTTP/HTTPContext.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage_fwd.h>
#include <Poco/Net/NameValueCollection.h>
#include <Core/Types.h>
#include "config.h"
@ -640,7 +642,7 @@ public:
void setClientInterface(ClientInfo::Interface interface);
void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
void setClientConnectionId(uint32_t connection_id);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
void setForwardedFor(const String & forwarded_for);
void setQueryKind(ClientInfo::QueryKind query_kind);
void setQueryKindInitial();
@ -1076,6 +1078,11 @@ public:
/// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
void setMaxTableSizeToDrop(size_t max_size);
size_t getMaxTableSizeToDrop() const;
void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
/// Return the forbiddent headers that users can't get via getClientHTTPHeader function
const std::unordered_set<String> & getClientHTTPHeaderForbiddenHeaders() const;
void setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function);
bool allowGetHTTPHeaderFunction() const;
void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
/// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
@ -1143,6 +1150,10 @@ public:
String getFormatSchemaPath() const;
void setFormatSchemaPath(const String & path);
/// Path to the folder containing the proto files for the well-known Protobuf types
String getGoogleProtosPath() const;
void setGoogleProtosPath(const String & path);
SampleBlockCache & getSampleBlockCache() const;
/// Query parameters for prepared statements.

View File

@ -215,7 +215,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z
}
bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log)
bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper)
{
bool host_in_hostlist = false;
std::exception_ptr first_exception = nullptr;
@ -262,6 +262,22 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log)
if (!host_in_hostlist && first_exception)
{
if (zookeeper->exists(getFinishedNodePath()))
{
LOG_WARNING(log, "Failed to find current host ID, but assuming that {} is finished because {} exists. Skipping the task. Error: {}",
entry_name, getFinishedNodePath(), getExceptionMessage(first_exception, /*with_stacktrace*/ true));
return false;
}
size_t finished_nodes_count = zookeeper->getChildren(fs::path(entry_path) / "finished").size();
if (entry.hosts.size() == finished_nodes_count)
{
LOG_WARNING(log, "Failed to find current host ID, but assuming that {} is finished because the number of finished nodes ({}) "
"equals to the number of hosts in list. Skipping the task. Error: {}",
entry_name, finished_nodes_count, getExceptionMessage(first_exception, /*with_stacktrace*/ true));
return false;
}
/// We don't know for sure if we should process task or not
std::rethrow_exception(first_exception);
}

View File

@ -143,7 +143,7 @@ struct DDLTask : public DDLTaskBase
{
DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log);
bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper);
void setClusterInfo(ContextPtr context, Poco::Logger * log);

View File

@ -214,7 +214,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
/// Stage 2: resolve host_id and check if we should execute query or not
/// Multiple clusters can use single DDL queue path in ZooKeeper,
/// So we should skip task if we cannot find current host in cluster hosts list.
if (!task->findCurrentHostID(context, log))
if (!task->findCurrentHostID(context, log, zookeeper))
{
out_reason = "There is no a local address in host list";
return add_to_skip_set();

View File

@ -944,18 +944,16 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(
std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block);
static std::shared_ptr<IJoin> chooseJoinAlgorithm(
std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
static std::shared_ptr<IJoin> tryCreateJoin(
JoinAlgorithm algorithm,
std::shared_ptr<TableJoin> analyzed_join,
const ColumnsWithTypeAndName & left_sample_columns,
const Block & right_sample_block,
std::unique_ptr<QueryPlan> & joined_plan,
ContextPtr context)
{
const auto & settings = context->getSettings();
Block right_sample_block = joined_plan->getCurrentDataStream().header;
std::vector<String> tried_algorithms;
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
if (algorithm == JoinAlgorithm::DIRECT || algorithm == JoinAlgorithm::DEFAULT)
{
tried_algorithms.push_back(toString(JoinAlgorithm::DIRECT));
JoinPtr direct_join = tryKeyValueJoin(analyzed_join, right_sample_block);
if (direct_join)
{
@ -965,54 +963,63 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
}
}
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) ||
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE))
if (algorithm == JoinAlgorithm::PARTIAL_MERGE ||
algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE)
{
tried_algorithms.push_back(toString(JoinAlgorithm::PARTIAL_MERGE));
if (MergeJoin::isSupported(analyzed_join))
return std::make_shared<MergeJoin>(analyzed_join, right_sample_block);
}
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::HASH) ||
if (algorithm == JoinAlgorithm::HASH ||
/// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) ||
analyzed_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH))
algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE ||
algorithm == JoinAlgorithm::PARALLEL_HASH ||
algorithm == JoinAlgorithm::DEFAULT)
{
tried_algorithms.push_back(toString(JoinAlgorithm::HASH));
const auto & settings = context->getSettings();
if (analyzed_join->allowParallelHashJoin())
return std::make_shared<ConcurrentHashJoin>(context, analyzed_join, settings.max_threads, right_sample_block);
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
}
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE))
if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE)
{
tried_algorithms.push_back(toString(JoinAlgorithm::FULL_SORTING_MERGE));
if (FullSortingMergeJoin::isSupported(analyzed_join))
return std::make_shared<FullSortingMergeJoin>(analyzed_join, right_sample_block);
}
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH))
if (algorithm == JoinAlgorithm::GRACE_HASH)
{
tried_algorithms.push_back(toString(JoinAlgorithm::GRACE_HASH));
// Grace hash join requires that columns exist in left_sample_block.
Block left_sample_block(left_sample_columns);
if (sanitizeBlock(left_sample_block, false) && GraceHashJoin::isSupported(analyzed_join))
return std::make_shared<GraceHashJoin>(context, analyzed_join, left_sample_block, right_sample_block, context->getTempDataOnDisk());
}
if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::AUTO))
if (algorithm == JoinAlgorithm::AUTO)
{
tried_algorithms.push_back(toString(JoinAlgorithm::AUTO));
if (MergeJoin::isSupported(analyzed_join))
return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
}
return nullptr;
}
static std::shared_ptr<IJoin> chooseJoinAlgorithm(
std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
{
Block right_sample_block = joined_plan->getCurrentDataStream().header;
const auto & join_algorithms = analyzed_join->getEnabledJoinAlgorithms();
for (const auto alg : join_algorithms)
{
auto join = tryCreateJoin(alg, analyzed_join, left_sample_columns, right_sample_block, joined_plan, context);
if (join)
return join;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Can't execute {} join algorithm for this strictness/kind and right storage type",
fmt::join(tried_algorithms, " or "));
"Can't execute any of specified join algorithms for this strictness/kind and right storage type");
}
static std::unique_ptr<QueryPlan> buildJoinedPlan(
@ -1070,9 +1077,6 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
std::shared_ptr<DirectKeyValueJoin> tryKeyValueJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & right_sample_block)
{
if (!analyzed_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT))
return nullptr;
auto storage = analyzed_join->getStorageKeyValue();
if (!storage)
return nullptr;

View File

@ -155,6 +155,11 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query");
if (!getContext()->getSettings().allow_experimental_statistic && (
command_ast->type == ASTAlterCommand::ADD_STATISTIC ||
command_ast->type == ASTAlterCommand::DROP_STATISTIC ||
command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC))
throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistic is now disabled. Turn on allow_experimental_statistic");
}
if (typeid_cast<DatabaseReplicated *>(database.get()))
@ -318,6 +323,21 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS
required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table);
break;
}
case ASTAlterCommand::ADD_STATISTIC:
{
required_access.emplace_back(AccessType::ALTER_ADD_STATISTIC, database, table);
break;
}
case ASTAlterCommand::DROP_STATISTIC:
{
required_access.emplace_back(AccessType::ALTER_DROP_STATISTIC, database, table);
break;
}
case ASTAlterCommand::MATERIALIZE_STATISTIC:
{
required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTIC, database, table);
break;
}
case ASTAlterCommand::ADD_INDEX:
{
required_access.emplace_back(AccessType::ALTER_ADD_INDEX, database, table);

View File

@ -437,6 +437,12 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
column_declaration->children.push_back(column_declaration->codec);
}
if (column.stat)
{
column_declaration->stat_type = column.stat->ast;
column_declaration->children.push_back(column_declaration->stat_type);
}
if (column.ttl)
{
column_declaration->ttl = column.ttl;
@ -639,6 +645,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec);
}
if (col_decl.stat_type)
{
if (!attach && !context_->getSettingsRef().allow_experimental_statistic)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic");
column.stat = StatisticDescription::getStatisticFromColumnDeclaration(col_decl);
}
if (col_decl.ttl)
column.ttl = col_decl.ttl;

View File

@ -66,8 +66,10 @@ namespace
static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data)
{
/// we need to read statistic when `allow_statistic_optimize` is enabled.
bool only_analyze = !data.getContext()->getSettings().allow_statistic_optimize;
InterpreterSelectQuery interpreter(
node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify());
node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify());
const SelectQueryInfo & query_info = interpreter.getQueryInfo();
if (query_info.view_query)

View File

@ -659,6 +659,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
MergeTreeWhereOptimizer where_optimizer{
std::move(column_compressed_sizes),
metadata_snapshot,
storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
queried_columns,
supported_prewhere_columns,
log};

View File

@ -23,11 +23,6 @@ InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, Contex
BlockIO InterpreterUndropQuery::execute()
{
if (!getContext()->getSettingsRef().allow_experimental_undrop_table_query)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Undrop table is experimental. "
"Set `allow_experimental_undrop_table_query` setting to enable it");
getContext()->checkAccess(AccessType::UNDROP_TABLE);
auto & undrop = query_ptr->as<ASTUndropQuery &>();
if (!undrop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))

View File

@ -308,6 +308,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
auto settings = context->getSettingsRef();
MultiEnum<JoinAlgorithm> join_algorithm = settings.join_algorithm;
bool try_use_direct_join = join_algorithm.isSet(JoinAlgorithm::DIRECT) || join_algorithm.isSet(JoinAlgorithm::DEFAULT);
auto table_join = std::make_shared<TableJoin>(settings, context->getGlobalTemporaryVolume());
const ASTTablesInSelectQueryElement * ast_join = select_query_.join();
@ -325,8 +326,8 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
table_join->setStorageJoin(storage_join);
}
if (auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage);
storage_dict && join_algorithm.isSet(JoinAlgorithm::DIRECT))
auto storage_dict = std::dynamic_pointer_cast<StorageDictionary>(storage);
if (storage_dict && try_use_direct_join && storage_dict->getDictionary()->getSpecialKeyType() != DictionarySpecialKeyType::Range)
{
FunctionDictHelper dictionary_helper(context);
@ -347,8 +348,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
table_join->setStorageJoin(dictionary_kv);
}
if (auto storage_kv = std::dynamic_pointer_cast<IKeyValueEntity>(storage);
storage_kv && join_algorithm.isSet(JoinAlgorithm::DIRECT))
if (auto storage_kv = std::dynamic_pointer_cast<IKeyValueEntity>(storage); storage_kv && try_use_direct_join)
{
table_join->setStorageJoin(storage_kv);
}

View File

@ -55,6 +55,7 @@ namespace ErrorCodes
extern const int CANNOT_UPDATE_COLUMN;
extern const int UNEXPECTED_EXPRESSION;
extern const int THERE_IS_NO_COLUMN;
extern const int ILLEGAL_STATISTIC;
}
@ -690,9 +691,15 @@ void MutationsInterpreter::prepare(bool dry_run)
{
if (column.default_desc.kind == ColumnDefaultKind::Materialized)
{
auto type_literal = std::make_shared<ASTLiteral>(column.type->getName());
auto materialized_column = makeASTFunction("_CAST",
column.default_desc.expression->clone(),
type_literal);
stages.back().column_to_updated.emplace(
column.name,
column.default_desc.expression->clone());
materialized_column);
}
}
}
@ -724,7 +731,7 @@ void MutationsInterpreter::prepare(bool dry_run)
}
else if (command.type == MutationCommand::MATERIALIZE_INDEX)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
auto it = std::find_if(
std::cbegin(indices_desc), std::end(indices_desc),
[&](const IndexDescription & index)
@ -744,9 +751,20 @@ void MutationsInterpreter::prepare(bool dry_run)
materialized_indices.emplace(command.index_name);
}
}
else if (command.type == MutationCommand::MATERIALIZE_STATISTIC)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
for (const auto & stat_column_name: command.statistic_columns)
{
if (!columns_desc.has(stat_column_name) || !columns_desc.get(stat_column_name).stat)
throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Unknown statistic column: {}", stat_column_name);
dependencies.emplace(stat_column_name, ColumnDependency::STATISTIC);
materialized_statistics.emplace(stat_column_name);
}
}
else if (command.type == MutationCommand::MATERIALIZE_PROJECTION)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
const auto & projection = projections_desc.get(command.projection_name);
if (!source.hasProjection(projection.name))
{
@ -757,12 +775,18 @@ void MutationsInterpreter::prepare(bool dry_run)
}
else if (command.type == MutationCommand::DROP_INDEX)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
materialized_indices.erase(command.index_name);
}
else if (command.type == MutationCommand::DROP_STATISTIC)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
for (const auto & stat_column_name: command.statistic_columns)
materialized_statistics.erase(stat_column_name);
}
else if (command.type == MutationCommand::DROP_PROJECTION)
{
mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION);
mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION);
materialized_projections.erase(command.projection_name);
}
else if (command.type == MutationCommand::MATERIALIZE_TTL)
@ -812,7 +836,9 @@ void MutationsInterpreter::prepare(bool dry_run)
auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency);
for (const auto & dependency : new_dependencies)
{
if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION)
if (dependency.kind == ColumnDependency::SKIP_INDEX
|| dependency.kind == ColumnDependency::PROJECTION
|| dependency.kind == ColumnDependency::STATISTIC)
dependencies.insert(dependency);
}
}
@ -1352,7 +1378,7 @@ QueryPipelineBuilder MutationsInterpreter::execute()
Block MutationsInterpreter::getUpdatedHeader() const
{
// If it's an index/projection materialization, we don't write any data columns, thus empty header is used
return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_PROJECTION ? Block{} : *updated_header;
return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION ? Block{} : *updated_header;
}
const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const

View File

@ -91,6 +91,8 @@ public:
NameSet grabMaterializedIndices() { return std::move(materialized_indices); }
NameSet grabMaterializedStatistics() { return std::move(materialized_statistics); }
NameSet grabMaterializedProjections() { return std::move(materialized_projections); }
struct MutationKind
@ -98,7 +100,7 @@ public:
enum MutationKindEnum
{
MUTATE_UNKNOWN,
MUTATE_INDEX_PROJECTION,
MUTATE_INDEX_STATISTIC_PROJECTION,
MUTATE_OTHER,
} mutation_kind = MUTATE_UNKNOWN;
@ -214,6 +216,7 @@ private:
NameSet materialized_indices;
NameSet materialized_projections;
NameSet materialized_statistics;
MutationKind mutation_kind; /// Do we meet any index or projection mutation.

View File

@ -1,124 +0,0 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace
{
bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
{
for (auto & arg : func.arguments->children)
{
if (const auto * arg_func = arg->as<ASTFunction>())
{
/// arrayJoin() is special and should not be optimized (think about
/// it as a an aggregate function), otherwise wrong result will be
/// produced:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
/// While should be:
/// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
/// │ 0 │ [] │
/// └────────┴─────────────────────────────────────────┘
if (arg_func->name == "arrayJoin")
return false;
if (arg_func->name == "lambda")
return false;
// We are looking for identifiers inside a function calculated inside
// the aggregate function `any()`. Window or aggregate function can't
// be inside `any`, but this check in GetAggregatesMatcher happens
// later, so we have to explicitly skip these nested functions here.
if (arg_func->is_window_function
|| AggregateUtils::isAggregateFunction(*arg_func))
{
return false;
}
if (!extractIdentifiers(*arg_func, identifiers))
return false;
}
else if (arg->as<ASTIdentifier>())
identifiers.emplace(&arg);
}
return true;
}
}
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
{
if (func->is_window_function)
return;
visit(*func, ast, data);
}
}
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
return;
if (func.name != "any" && func.name != "anyLast")
return;
auto & func_arguments = func.arguments->children;
if (func_arguments.size() != 1)
return;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;
/// We have rewritten this function. Just unwrap its argument.
if (data.rewritten.contains(ast.get()))
{
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
return;
}
std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
if (!extractIdentifiers(func, identifiers))
return;
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
for (auto * ast_to_change : identifiers)
{
ASTPtr identifier_ast = *ast_to_change;
*ast_to_change = makeASTFunction(func.name);
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
}
data.rewritten.insert(ast.get());
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
}
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>() &&
!node->as<ASTTableExpression>() &&
!node->as<ASTArrayJoin>();
}
}

View File

@ -1,29 +0,0 @@
#pragma once
#include <unordered_set>
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTFunction;
/// Rewrite 'any' and 'anyLast' functions pushing them inside original function.
/// any(f(x, y, g(z))) -> f(any(x), any(y), g(any(z)))
class RewriteAnyFunctionMatcher
{
public:
struct Data
{
std::unordered_set<IAST *> rewritten;
};
static void visit(ASTPtr & ast, Data & data);
static void visit(const ASTFunction &, ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
};
using RewriteAnyFunctionVisitor = InDepthNodeVisitor<RewriteAnyFunctionMatcher, false>;
}

View File

@ -15,6 +15,7 @@
#include <Interpreters/Cluster.h>
#include <magic_enum.hpp>
#include <Poco/Net/NameValueCollection.h>
#include <atomic>
#include <condition_variable>
@ -431,7 +432,7 @@ void Session::setClientConnectionId(uint32_t connection_id)
prepared_client_info->connection_id = connection_id;
}
void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
{
if (session_context)
{
@ -442,6 +443,7 @@ void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String
prepared_client_info->http_method = http_method;
prepared_client_info->http_user_agent = http_user_agent;
prepared_client_info->http_referer = http_referer;
prepared_client_info->headers = http_headers;
}
}

View File

@ -5,6 +5,7 @@
#include <Interpreters/ClientInfo.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/SessionTracker.h>
#include <Poco/Net/NameValueCollection.h>
#include <chrono>
#include <memory>
@ -64,7 +65,7 @@ public:
void setClientInterface(ClientInfo::Interface interface);
void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
void setClientConnectionId(uint32_t connection_id);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
void setForwardedFor(const String & forwarded_for);
void setQuotaClientKey(const String & quota_key);
void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);

View File

@ -936,7 +936,9 @@ void TableJoin::resetToCross()
bool TableJoin::allowParallelHashJoin() const
{
if (!right_storage_name.empty() || !join_algorithm.isSet(JoinAlgorithm::PARALLEL_HASH))
if (std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::PARALLEL_HASH) == join_algorithm.end())
return false;
if (!right_storage_name.empty())
return false;
if (table_join.kind != JoinKind::Left && table_join.kind != JoinKind::Inner)
return false;

View File

@ -140,7 +140,7 @@ private:
const size_t default_max_bytes = 0;
const bool join_use_nulls = false;
const size_t max_joined_block_rows = 0;
MultiEnum<JoinAlgorithm> join_algorithm = MultiEnum<JoinAlgorithm>(JoinAlgorithm::AUTO);
std::vector<JoinAlgorithm> join_algorithm;
const size_t partial_merge_join_rows_in_right_blocks = 0;
const size_t partial_merge_join_left_table_buffer_bytes = 0;
const size_t max_files_to_merge = 0;
@ -236,7 +236,7 @@ public:
: size_limits(limits)
, default_max_bytes(0)
, join_use_nulls(use_nulls)
, join_algorithm(JoinAlgorithm::DEFAULT)
, join_algorithm({JoinAlgorithm::DEFAULT})
{
clauses.emplace_back().key_names_right = key_names_right;
table_join.kind = kind;
@ -253,16 +253,16 @@ public:
ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const;
const std::vector<JoinAlgorithm> & getEnabledJoinAlgorithms() const { return join_algorithm; }
bool isEnabledAlgorithm(JoinAlgorithm val) const
{
/// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm.
/// It's behaviour that was initially supported by clickhouse.
bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT
|| val == JoinAlgorithm::HASH
|| val == JoinAlgorithm::DIRECT;
if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default)
bool is_default_enabled = std::find(join_algorithm.begin(), join_algorithm.end(), JoinAlgorithm::DEFAULT) != join_algorithm.end();
if (is_default_enabled && (val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT))
return true;
return join_algorithm.isSet(val);
return std::find(join_algorithm.begin(), join_algorithm.end(), val) != join_algorithm.end();
}
bool allowParallelHashJoin() const;

View File

@ -14,6 +14,11 @@
#include <Core/Defines.h>
#include <Interpreters/Cache/WriteBufferToFileSegment.h>
namespace ProfileEvents
{
extern const Event ExternalProcessingFilesTotal;
}
namespace DB
{
@ -97,6 +102,8 @@ FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
if (!file_cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache");
ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal);
const auto key = FileSegment::Key::random();
auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
fs::create_directories(file_cache->getPathInLocalCache(key));
@ -120,7 +127,7 @@ TemporaryFileOnDiskHolder TemporaryDataOnDisk::createRegularFile(size_t max_file
{
disk = volume->getDisk();
}
/// We do not increment ProfileEvents::ExternalProcessingFilesTotal here because it is incremented in TemporaryFileOnDisk constructor.
return std::make_unique<TemporaryFileOnDisk>(disk, current_metric_scope);
}

View File

@ -11,7 +11,6 @@
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
@ -606,12 +605,6 @@ void optimizeAggregationFunctions(ASTPtr & query)
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
}
void optimizeAnyFunctions(ASTPtr & query)
{
RewriteAnyFunctionVisitor::Data data = {};
RewriteAnyFunctionVisitor(data).visit(query);
}
void optimizeSumIfFunctions(ASTPtr & query)
{
RewriteSumIfFunctionVisitor::Data data = {};
@ -764,10 +757,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (settings.optimize_group_by_function_keys)
optimizeGroupByFunctionKeys(select_query);
/// Move all operations out of any function
if (settings.optimize_move_functions_out_of_any)
optimizeAnyFunctions(query);
if (settings.optimize_normalize_count_variants)
optimizeCountConstantAndSumOne(query, context);

View File

@ -201,6 +201,33 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
partition->formatImpl(settings, state, frame);
}
}
else if (type == ASTAlterCommand::ADD_STATISTIC)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTIC " << (if_not_exists ? "IF NOT EXISTS " : "")
<< (settings.hilite ? hilite_none : "");
statistic_decl->formatImpl(settings, state, frame);
}
else if (type == ASTAlterCommand::DROP_STATISTIC)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "STATISTIC "
<< (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
statistic_decl->formatImpl(settings, state, frame);
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
}
else if (type == ASTAlterCommand::MATERIALIZE_STATISTIC)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTIC " << (settings.hilite ? hilite_none : "");
statistic_decl->formatImpl(settings, state, frame);
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
}
else if (type == ASTAlterCommand::ADD_CONSTRAINT)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "")

View File

@ -54,6 +54,10 @@ public:
DROP_PROJECTION,
MATERIALIZE_PROJECTION,
ADD_STATISTIC,
DROP_STATISTIC,
MATERIALIZE_STATISTIC,
DROP_PARTITION,
DROP_DETACHED_PARTITION,
ATTACH_PARTITION,
@ -129,6 +133,8 @@ public:
*/
ASTPtr projection;
ASTPtr statistic_decl;
/** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries.
* The value or ID of the partition is stored here.
*/
@ -167,6 +173,8 @@ public:
bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata)
bool clear_statistic = false; /// for CLEAR STATISTIC (do not drop statistic from metadata)
bool clear_projection = false; /// for CLEAR PROJECTION (do not drop projection from metadata)
bool if_not_exists = false; /// option for ADD_COLUMN

View File

@ -39,6 +39,12 @@ ASTPtr ASTColumnDeclaration::clone() const
res->children.push_back(res->codec);
}
if (stat_type)
{
res->stat_type = stat_type->clone();
res->children.push_back(res->stat_type);
}
if (ttl)
{
res->ttl = ttl->clone();
@ -99,6 +105,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
codec->formatImpl(settings, state, frame);
}
if (stat_type)
{
settings.ostr << ' ';
stat_type->formatImpl(settings, state, frame);
}
if (ttl)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';

View File

@ -19,6 +19,7 @@ public:
bool ephemeral_default = false;
ASTPtr comment;
ASTPtr codec;
ASTPtr stat_type;
ASTPtr ttl;
ASTPtr collation;
bool primary_key_specifier = false;

View File

@ -0,0 +1,42 @@
#include <Parsers/ASTStatisticDeclaration.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/quoteString.h>
#include <IO/Operators.h>
#include <Parsers/ASTFunction.h>
namespace DB
{
ASTPtr ASTStatisticDeclaration::clone() const
{
auto res = std::make_shared<ASTStatisticDeclaration>();
res->set(res->columns, columns->clone());
res->type = type;
return res;
}
std::vector<String> ASTStatisticDeclaration::getColumnNames() const
{
std::vector<String> result;
result.reserve(columns->children.size());
for (const ASTPtr & column_ast : columns->children)
{
result.push_back(column_ast->as<ASTIdentifier &>().name());
}
return result;
}
void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
columns->formatImpl(s, state, frame);
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
s.ostr << backQuoteIfNeed(type);
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTFunction;
/** name BY columns TYPE typename(args) in create query
*/
class ASTStatisticDeclaration : public IAST
{
public:
IAST * columns;
/// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc.
String type;
/** Get the text that identifies this element. */
String getID(char) const override { return "Stat"; }
std::vector<String> getColumnNames() const;
ASTPtr clone() const override;
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
};
}

Some files were not shown because too many files have changed in this diff Show More