Merge remote-tracking branch 'upstream/master' into ncb/hostname-system-log-tables

This commit is contained in:
Bharat Nallan Chakravarthy 2023-12-03 15:19:47 -08:00
commit 440dc66a5c
541 changed files with 11652 additions and 3411 deletions

View File

@ -33,10 +33,9 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -385,9 +385,25 @@ endif ()
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
# These files needs to be installed to make it possible that users can use well-known protobuf types
set(google_proto_files
${protobuf_source_dir}/src/google/protobuf/any.proto
${protobuf_source_dir}/src/google/protobuf/api.proto
${protobuf_source_dir}/src/google/protobuf/descriptor.proto
${protobuf_source_dir}/src/google/protobuf/duration.proto
${protobuf_source_dir}/src/google/protobuf/empty.proto
${protobuf_source_dir}/src/google/protobuf/field_mask.proto
${protobuf_source_dir}/src/google/protobuf/source_context.proto
${protobuf_source_dir}/src/google/protobuf/struct.proto
${protobuf_source_dir}/src/google/protobuf/timestamp.proto
${protobuf_source_dir}/src/google/protobuf/type.proto
${protobuf_source_dir}/src/google/protobuf/wrappers.proto
)
add_library(_protobuf INTERFACE)
target_link_libraries(_protobuf INTERFACE _libprotobuf)
target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
add_library(ch_contrib::protobuf ALIAS _protobuf)
add_library(_protoc INTERFACE)

View File

@ -33,7 +33,7 @@ target_include_directories(cxxabi SYSTEM BEFORE
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DHAS_THREAD_LOCAL)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
target_link_libraries(cxxabi PUBLIC unwind)

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit 791d68fd89902835133c50435e380ec7a73271b7
Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298

View File

@ -47,6 +47,12 @@ SELECT * FROM test_table;
└──────┴───────┘
```
## Virtual columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## See also
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)

View File

@ -85,6 +85,10 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
</rocksdb>
```
By default trivial approximate count optimization is turned off, which might affect the performance `count()` queries. To enable this
optimization set up `optimize_trivial_approximate_count_query = 1`. Also, this setting affects `system.tables` for EmbeddedRocksDB engine,
turn on the settings to see approximate values for `total_rows` and `total_bytes`.
## Supported operations {#supported-operations}
### Inserts

View File

@ -230,8 +230,9 @@ libhdfs3 support HDFS namenode HA.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -142,8 +142,9 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
## Virtual columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).

View File

@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [TTL expr1] [PRIMARY KEY],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [TTL expr2] [PRIMARY KEY],
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY],
...
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
@ -1358,3 +1358,33 @@ In this sample configuration:
- `_partition_value` — Values (a tuple) of a `partition by` expression.
- `_sample_factor` — Sample factor (from the query).
- `_block_number` — Block number of the row, it is persisted on merges when `allow_experimental_block_number_column` is set to true.
## Column Statistics (Experimental) {#column-statistics}
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
``` sql
CREATE TABLE example_table
(
a Int64 STATISTIC(tdigest),
b Float64
)
ENGINE = MergeTree
ORDER BY a
```
We can also manipulate statistics with `ALTER` statements.
```sql
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
```
These lightweight statistics aggregate information about distribution of values in columns.
They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
#### Available Types of Column Statistics {#available-types-of-column-statistics}
- `tdigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.

View File

@ -87,12 +87,18 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
- Indices
- Replication
## PARTITION BY
## PARTITION BY {#partition-by}
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.

View File

@ -103,6 +103,12 @@ SELECT * FROM url_engine_table
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Virtual Columns {#virtual-columns}
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.

View File

@ -56,7 +56,7 @@ On Linux, macOS and FreeBSD:
./clickhouse client
ClickHouse client version 23.2.1.1501 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 23.2.1 revision 54461.
Connected to ClickHouse server version 23.2.1.
local-host :)
```

View File

@ -16,7 +16,7 @@ ClickHouse provides a native command-line client: `clickhouse-client`. The clien
$ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442.
Connected to ClickHouse server version 20.13.1.
:)
```

View File

@ -16,9 +16,9 @@ More information about PGO in ClickHouse you can read in the corresponding GitHu
There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse.
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`.
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-generate` option to `CXXFLAGS`.
2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments.
3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step.
3. Recompile ClickHouse once again with `-fprofile-use` compiler flags and profiles that are collected from the previous step.
A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization).

View File

@ -1835,9 +1835,10 @@ Settings:
- `endpoint` HTTP endpoint for scraping metrics by prometheus server. Start from /.
- `port` Port for `endpoint`.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `metrics` Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
**Example**
@ -1853,6 +1854,7 @@ Settings:
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
<!-- highlight-end -->
</clickhouse>
@ -2350,7 +2352,7 @@ Path on the local filesystem to store temporary data for processing large querie
## user_files_path {#user_files_path}
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md), [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
**Example**

View File

@ -149,7 +149,7 @@ Possible values:
- Any positive integer.
- 0 (disable deduplication)
Default value: 100.
Default value: 1000.
The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper.
A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.

View File

@ -4801,6 +4801,14 @@ a Tuple(
)
```
## allow_experimental_statistic {#allow_experimental_statistic}
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
## allow_statistic_optimize {#allow_statistic_optimize}
Allows using statistic to optimize the order of [prewhere conditions](../../sql-reference/statements/select/prewhere.md).
## analyze_index_with_space_filling_curves
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.

View File

@ -31,3 +31,26 @@ SELECT * FROM system.numbers LIMIT 10;
10 rows in set. Elapsed: 0.001 sec.
```
You can also limit the output by predicates.
```sql
SELECT * FROM system.numbers < 10;
```
```response
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```

View File

@ -18,7 +18,9 @@ Columns:
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server.
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server.
- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type:
- `Real` represents collecting stack traces by wall-clock time.
- `CPU` represents collecting stack traces by CPU time.

View File

@ -5,7 +5,12 @@ sidebar_position: 6
# any
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column.
Selects the first encountered value of a column.
By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the min or max function instead of any.
@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
- Alias: `any_value`
- Alias: `any_value`, `first_value`.

View File

@ -5,9 +5,12 @@ sidebar_position: 7
# first_value
Selects the first encountered value, similar to `any`, but could accept NULL.
Mostly it should be used with [Window Functions](../../window-functions/index.md).
Without Window Functions the result will be random if the source stream is not ordered.
It is an alias for [`any`](../../../sql-reference/aggregate-functions/reference/any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse aggregate functions ignore NULL values).
It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations.
As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type
matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added).
## examples
@ -23,15 +26,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null);
```
### example1
The NULL value is ignored at default.
By default, the NULL value is ignored.
```sql
select first_value(b) from test_data;
```
```text
┌─first_value_ignore_nulls(b)─┐
3 │
└─────────────────────────────
┌─any(b)─┐
│ 3 │
└────────┘
```
### example2
@ -41,9 +44,9 @@ select first_value(b) ignore nulls from test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
3 │
└─────────────────────────────
┌─any(b) IGNORE NULLS ─┐
│ 3 │
└──────────────────────┘
```
### example3
@ -53,9 +56,9 @@ select first_value(b) respect nulls from test_data
```
```text
┌─first_value_respect_nulls(b)─┐
ᴺᵁᴸᴸ │
└──────────────────────────────
┌─any(b) RESPECT NULLS ─┐
│ ᴺᵁᴸᴸ │
└───────────────────────┘
```
### example4
@ -73,8 +76,8 @@ FROM
```
```text
┌─first_value_respect_nulls(b)─┬─first_value(b)─┐
ᴺᵁᴸᴸ │ 3 │
└──────────────────────────────────────────────┘
┌─any_respect_nulls(b)─┬─any(b)─┐
│ ᴺᵁᴸᴸ │ 3 │
└──────────────────────┴────────┘
```

View File

@ -0,0 +1,48 @@
---
toc_priority: 112
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```

View File

@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)

View File

@ -56,7 +56,7 @@ Functions:
## Related content
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -1083,7 +1083,7 @@ Result:
**See also**
- [arrayFold](#arrayFold)
- [arrayFold](#arrayfold)
## arrayReduceInRanges
@ -1175,7 +1175,7 @@ FROM numbers(1,10);
**See also**
- [arrayReduce](#arrayReduce)
- [arrayReduce](#arrayreduce)
## arrayReverse(arr)

View File

@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
Returns time and date values according to the determined format.
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
| Placeholder | Description | Example |
| Placeholder | Description | Example |
|----------|---------------------------------------------------------|------------|
| %a | abbreviated weekday name (Mon-Sun) | Mon |
| %b | abbreviated month name (Jan-Dec) | Jan |
| %c | month as an integer number (01-12) | 01 |
| %c | month as an integer number (01-12), see 'Note 3' below | 01 |
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %i | minute (00-59) | 33 |
| %I | hour in 12h format (01-12) | 10 |
| %j | day of the year (001-366) | 002 |
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %k | hour in 24h format (00-23), see 'Note 3' below | 14 |
| %l | hour in 12h format (01-12), see 'Note 3' below | 09 |
| %m | month as an integer number (01-12) | 01 |
| %M | full month name (January-December), see 'Note 2' below | January |
| %n | new-line character () | |
@ -2579,6 +2580,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
**Example**
``` sql

View File

@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
└──────────┴──────┴───────────┴───────────┘
```
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
Get the first available contact method for the customer from the contact list:

View File

@ -67,7 +67,45 @@ WHERE macro = 'test';
│ test │ Value │
└───────┴──────────────┘
```
## getClientHTTPHeader
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
**Syntax**
```sql
getClientHTTPHeader(name);
```
**Arguments**
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
**Returned value**
Value of the specified header.
Type:[String](../../sql-reference/data-types/string.md#string).
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
```sql
SELECT getCientHTTPHeader('test')
```
result:
```text
┌─getClientHTTPHeader('test')─┐
│ │
└────────────------───────────┘
```
Try to use http request:
```shell
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
#result
default
```
## FQDN
Returns the fully qualified domain name of the ClickHouse server.

View File

@ -5,7 +5,7 @@ slug: /en/sql-reference/operators/exists
The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`.
`EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
`EXISTS` can also be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
:::tip
References to main query tables and columns are not supported in a subquery.
@ -13,12 +13,26 @@ References to main query tables and columns are not supported in a subquery.
**Syntax**
```sql
WHERE EXISTS(subquery)
``` sql
EXISTS(subquery)
```
**Example**
Query checking existence of values in a subquery:
``` sql
SELECT EXISTS(SELECT * FROM numbers(10) WHERE number > 8), EXISTS(SELECT * FROM numbers(10) WHERE number > 11)
```
Result:
``` text
┌─in(1, _subquery1)─┬─in(1, _subquery2)─┐
│ 1 │ 0 │
└───────────────────┴───────────────────┘
```
Query with a subquery returning several rows:
``` sql

View File

@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
Syntax:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
In the query, specify a list of one or more comma-separated actions.

View File

@ -16,6 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
- [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
:::note
Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).

View File

@ -0,0 +1,25 @@
---
slug: /en/sql-reference/statements/alter/statistic
sidebar_position: 45
sidebar_label: STATISTIC
---
# Manipulating Column Statistics
The following operations are available:
- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The first two commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing statistics metadata via ZooKeeper.
:::note
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -415,7 +415,7 @@ ExpressionTransform
ExpressionTransform × 2
(SettingQuotaAndLimits)
(ReadFromStorage)
NumbersMt × 2 0 → 1
NumbersRange × 2 0 → 1
```
### EXPLAIN ESTIMATE

View File

@ -67,6 +67,12 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
└─────────┘
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
**See Also**
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)

View File

@ -191,12 +191,13 @@ Query the total number of rows from all files `file002` inside any folder in dir
SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
```
## Virtual Columns
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
## Settings
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -0,0 +1,85 @@
---
slug: /en/sql-reference/table-functions/fileCluster
sidebar_position: 61
sidebar_label: fileCluster
---
# fileCluster Table Function
Enables simultaneous processing of files matching a specified path across multiple nodes within a cluster. The initiator establishes connections to worker nodes, expands globs in the file path, and delegates file-reading tasks to worker nodes. Each worker node is querying the initiator for the next file to process, repeating until all tasks are completed (all files are read).
:::note
This function will operate _correctly_ only in case the set of files matching the initially specified path is identical across all nodes, and their content is consistent among different nodes.
In case these files differ between nodes, the return value cannot be predetermined and depends on the order in which worker nodes request tasks from the initiator.
:::
**Syntax**
``` sql
fileCluster(cluster_name, path[, format, structure, compression_method])
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path).
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
**Returned value**
A table with the specified format and structure and with data from files matching the specified path.
**Example**
Given a cluster named `my_cluster` and given the following value of setting `user_files_path`:
``` bash
$ grep user_files_path /etc/clickhouse-server/config.xml
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
Also, given there are files `test1.csv` and `test2.csv` inside `user_files_path` of each cluster node, and their content is identical across different nodes:
```bash
$ cat /var/lib/clickhouse/user_files/test1.csv
1,"file1"
11,"file11"
$ cat /var/lib/clickhouse/user_files/test1.csv
2,"file2"
22,"file22"
```
For example, one can create these files by executing these two queries on every cluster node:
```sql
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
```
Now, read data contents of `test1.csv` and `test2.csv` via `fileCluster` table function:
```sql
SELECT * from fileCluster(
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
)
```
```
┌──i─┬─s──────┐
│ 1 │ file1 │
│ 11 │ file11 │
└────┴────────┘
┌──i─┬─s──────┐
│ 2 │ file2 │
│ 22 │ file22 │
└────┴────────┘
```
## Globs in Path {#globs_in_path}
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.
**See Also**
- [File table function](../../sql-reference/table-functions/file.md)

View File

@ -0,0 +1,86 @@
---
slug: /en/sql-reference/table-functions/fuzzJSON
sidebar_position: 75
sidebar_label: fuzzJSON
---
# fuzzJSON
Perturbs a JSON string with random variations.
``` sql
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
```
**Arguments**
- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
- `option=value` - Named collection optional parameters and their values.
- `json_str` (String) - The source string representing structured data in JSON format.
- `random_seed` (UInt64) - Manual random seed for producing stable results.
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
- `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
- `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
- `max_string_value_length` (UInt64) - The maximum length of a String value.
- `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
- `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
**Returned Value**
A table object with a a single column containing perturbed JSON strings.
## Usage Example
``` sql
CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
```
``` text
{"52Xz2Zd4vKNcuP2":true}
{"UPbOhOQAdPKIg91":3405264103600403024}
{"X0QUWu8yT":[]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
```
``` text
{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRE3":true}
{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
```
``` sql
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
```
``` text
{"students":["Alice", "Bob"], "BREhhXj5":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
```
``` sql
SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
```
``` text
{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
{"BRjE":16137826149911306846}
{"XjKE":15076727133550123563}
```

View File

@ -94,8 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
## Virtual Columns
- `_path` — Path to the file.
- `_file` — Name of the file.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -17,6 +17,8 @@ The following queries are equivalent:
SELECT * FROM numbers(10);
SELECT * FROM numbers(0, 10);
SELECT * FROM system.numbers LIMIT 10;
SELECT * FROM system.numbers WHERE number BETWEEN 0 AND 9;
SELECT * FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
```
Examples:

View File

@ -228,6 +228,12 @@ FROM s3(
LIMIT 5;
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -50,8 +50,9 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Virtual Columns
- `_path` — Path to the `URL`.
- `_file` — Resource name of the `URL`.
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -14,7 +14,7 @@ ClickHouse предоставляет собственный клиент ком
$ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442.
Connected to ClickHouse server version 20.13.1.
:)
```

View File

@ -1215,6 +1215,7 @@ ClickHouse использует потоки из глобального пул
- `metrics` флаг для экспорта текущих значений метрик из таблицы [system.metrics](../system-tables/metrics.md#system_tables-metrics).
- `events` флаг для экспорта текущих значений метрик из таблицы [system.events](../system-tables/events.md#system_tables-events).
- `asynchronous_metrics` флаг для экспорта текущих значений значения метрик из таблицы [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics).
- `errors` - флаг для экспорта количества ошибок (по кодам) случившихся с момента последнего рестарта сервера. Эта информация может быть получена из таблицы [system.errors](../system-tables/asynchronous_metrics.md#system_tables-errors)
**Пример**
@ -1225,6 +1226,7 @@ ClickHouse использует потоки из глобального пул
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
```
@ -1676,7 +1678,7 @@ TCP порт для защищённого обмена данными с кли
## user_files_path {#server_configuration_parameters-user_files_path}
Каталог с пользовательскими файлами. Используется в табличной функции [file()](../../operations/server-configuration-parameters/settings.md).
Каталог с пользовательскими файлами. Используется в табличных функциях [file()](../../sql-reference/table-functions/fileCluster.md) и [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
**Пример**

View File

@ -119,7 +119,7 @@ Eсли суммарное число активных кусков во все
- Положительное целое число.
- 0 (без ограничений).
Значение по умолчанию: 100.
Значение по умолчанию: 1000.
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм.

View File

@ -19,7 +19,7 @@ ClickHouse создает эту таблицу когда установлен
- `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse.
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
- `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки:

View File

@ -11,7 +11,7 @@ sidebar_label: "Манипуляции со столбцами"
Синтаксис:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
В запросе можно указать сразу несколько действий над одной таблицей через запятую.

View File

@ -371,7 +371,7 @@ ExpressionTransform
ExpressionTransform × 2
(SettingQuotaAndLimits)
(ReadFromStorage)
NumbersMt × 2 0 → 1
NumbersRange × 2 0 → 1
```
### EXPLAIN ESTIMATE {#explain-estimate}

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис**
``` sql
file(path [,format] [,structure])
file(path [,format] [,structure] [,compression])
```
**Параметры**
@ -21,6 +21,7 @@ file(path [,format] [,structure])
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
- `compression` — Используемый тип сжатия для запроса SELECT или желаемый тип сжатия для запроса INSERT. Поддерживаемые типы сжатия: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
**Возвращаемое значение**

View File

@ -0,0 +1,84 @@
---
slug: /ru/sql-reference/table-functions/fileCluster
sidebar_position: 38
sidebar_label: fileCluster
---
# fileCluster
Позволяет одновременно обрабатывать файлы, находящиеся по указанному пути, на нескольких узлах внутри кластера. Узел-инициатор устанавливает соединения с рабочими узлами (worker nodes), раскрывает шаблоны в пути к файлам и отдаёт задачи по чтению файлов рабочим узлам. Рабочий узел запрашивает у инициатора путь к следующему файлу для обработки, повторяя до тех пор, пока не завершатся все задачи (то есть пока не будут обработаны все файлы).
:::note
Эта табличная функция будет работать орректно_ только в случае, если набор файлов, соответствующих изначально указанному пути, одинаков на всех узлах и содержание этих файлов идентично на различных узлах. В случае, если эти файлы различаются между узлами, результат не предопределён и зависит от очерёдности, с которой рабочие узлы будут запрашивать задачи у инициатора.
:::
**Синтаксис**
``` sql
fileCluster(cluster_name, path[, format, structure, compression_method])
```
**Аргументы**
- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам.
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает [шаблоны поискаglobs](#globs_in_path).
- `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
- `compression_method` — Используемый тип сжатия. Поддерживаемые типы: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
**Возвращаемое значение**
Таблица с указанным форматом и структурой, содержащая данные из файлов, соответствующих указанному пути.
**Пример**
Пусть есть кластер с именем `my_cluster`, а также установлено нижеследующее значение параметра `user_files_path`:
``` bash
$ grep user_files_path /etc/clickhouse-server/config.xml
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
Пусть также на каждом узле кластера в директории `user_files_path` находятся файлы `test1.csv` и `test2.csv`, и их содержимое идентично на разных узлах:
```bash
$ cat /var/lib/clickhouse/user_files/test1.csv
1,"file1"
11,"file11"
$ cat /var/lib/clickhouse/user_files/test1.csv
2,"file2"
22,"file22"
```
Например, эти файлы можно создать, выполнив на каждом узле два запроса:
```sql
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
```
Прочитаем содержимое файлов `test1.csv` и `test2.csv` с помощью табличной функции `fileCluster`:
```sql
SELECT * from fileCluster(
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
)
```
```
┌──i─┬─s──────┐
│ 1 │ file1 │
│ 11 │ file11 │
└────┴────────┘
┌──i─┬─s──────┐
│ 2 │ file2 │
│ 22 │ file22 │
└────┴────────┘
```
## Шаблоны поиска в компонентах пути {#globs_in_path}
Поддерживаются все шаблоны поиска, что поддерживаются табличной функцией [File](../../sql-reference/table-functions/file.md#globs-in-path).
**Смотрите также**
- [File (табличная функция)](../../sql-reference/table-functions/file.md)

View File

@ -14,7 +14,7 @@ ClickHouse提供了一个原生命令行客户端`clickhouse-client`客户端支
$ clickhouse-client
ClickHouse client version 19.17.1.1579 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 19.17.1 revision 54428.
Connected to ClickHouse server version 19.17.1.
:)
```

View File

@ -22,7 +22,7 @@ ClickHouse创建此表时 [trace_log](../../operations/server-configuration-para
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1.`. 该字段包含 `revision`,但不是 `version` 的服务器。
- `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type:

View File

@ -44,6 +44,8 @@ contents:
dst: /usr/bin/clickhouse-odbc-bridge
- src: root/usr/share/bash-completion/completions
dst: /usr/share/bash-completion/completions
- src: root/usr/share/clickhouse
dst: /usr/share/clickhouse
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static/AUTHORS

View File

@ -457,3 +457,10 @@ endif()
if (ENABLE_FUZZING)
add_compile_definitions(FUZZING_MODE=1)
endif ()
if (TARGET ch_contrib::protobuf)
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
foreach (proto_file IN LISTS google_proto_files)
install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
endforeach()
endif ()

View File

@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
/// Set path for format schema files
if (config().has("format_schema_path"))
global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
}
@ -489,8 +493,7 @@ void Client::connect()
if (is_interactive)
{
std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "."
<< std::endl << std::endl;
std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl;
auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch);

View File

@ -37,7 +37,7 @@
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
</prompt_by_server_display_name>
<!--
<!--
Settings adjustable via command-line parameters
can take their defaults from that config file, see examples:
@ -58,6 +58,9 @@
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
-->
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Analog of .netrc -->
<![CDATA[

View File

@ -41,6 +41,7 @@
<min_session_timeout_ms>10000</min_session_timeout_ms>
<session_timeout_ms>100000</session_timeout_ms>
<raft_logs_level>information</raft_logs_level>
<compress_logs>false</compress_logs>
<!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
</coordination_settings>

View File

@ -1279,6 +1279,8 @@ try
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
@ -1575,6 +1577,10 @@ try
global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path);
/// Set the path for google proto files
if (config().has("google_protos_path"))
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
/// Set path for filesystem caches
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
if (!filesystem_caches_path.empty())

View File

@ -3,6 +3,7 @@
<tmp_path replace="replace">./tmp/</tmp_path>
<user_files_path replace="replace">./user_files/</user_files_path>
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
<google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
<access_control_path replace="replace">./access/</access_control_path>
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
</clickhouse>

View File

@ -1428,6 +1428,10 @@
-->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<!-- Directory containing the proto files for the well-known Protobuf types.
-->
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
(both text logs and system.query_log).
name - name for the rule (optional)

View File

@ -51,6 +51,11 @@ enum class AccessType
M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
\
M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
\
M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
M(ALTER_MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION", TABLE, ALTER_PROJECTION) \

View File

@ -1,26 +1,213 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
namespace
{
struct AggregateFunctionAnyRespectNullsData
{
enum Status : UInt8
{
NotSet = 1,
SetNull = 2,
SetOther = 3
};
Status status = Status::NotSet;
Field value;
bool isSet() const { return status != Status::NotSet; }
void setNull() { status = Status::SetNull; }
void setOther() { status = Status::SetOther; }
};
template <bool First>
class AggregateFunctionAnyRespectNulls final
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
{
public:
using Data = AggregateFunctionAnyRespectNullsData;
SerializationPtr serialization;
const bool returns_nullable_type = false;
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, returns_nullable_type(type->isNullable())
{
}
String getName() const override
{
if constexpr (First)
return "any_respect_nulls";
else
return "anyLast_respect_nulls";
}
bool allocatesMemoryInArena() const override { return false; }
void addNull(AggregateDataPtr __restrict place) const
{
chassert(returns_nullable_type);
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setNull();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if (columns[0]->isNullable())
{
if (columns[0]->isNullAt(row_num))
return addNull(place);
}
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setOther();
columns[0]->get(row_num, d.value);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (columns[0]->isNullable())
addNull(place);
else
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
size_t size = row_end - row_begin;
for (size_t i = 0; i < size; ++i)
{
size_t pos = First ? row_begin + i : row_end - 1 - i;
if (flags[pos])
{
add(place, columns, pos, arena);
break;
}
}
}
else
{
size_t pos = First ? row_begin : row_end - 1;
add(place, columns, pos, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
{
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
/// have discarded values that we need (NULLs)
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & d = this->data(place);
if (First && d.isSet())
return;
auto & other = this->data(rhs);
if (other.isSet())
{
d.status = other.status;
d.value = other.value;
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & d = this->data(place);
UInt8 k = d.status;
writeBinaryLittleEndian<UInt8>(k, buf);
if (k == Data::Status::SetOther)
serialization->serializeBinary(d.value, buf, {});
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
auto & d = this->data(place);
UInt8 k = Data::Status::NotSet;
readBinaryLittleEndian<UInt8>(k, buf);
d.status = static_cast<Data::Status>(k);
if (d.status == Data::Status::NotSet)
return;
else if (d.status == Data::Status::SetNull)
{
if (!returns_nullable_type)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
return;
}
else if (d.status == Data::Status::SetOther)
serialization->deserializeBinary(d.value, buf, {});
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & d = this->data(place);
if (d.status == Data::Status::SetOther)
to.insert(d.value);
else
to.insertDefault();
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & original_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
return original_function;
}
};
template <bool First>
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
}
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAny(
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -28,13 +215,10 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
AggregateFunctionsSingleValue,
AggregateFunctionAnyLastData,
RespectNulls>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -46,26 +230,28 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
AggregateFunctionProperties default_properties_for_respect_nulls
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any", { createAggregateFunctionAny, properties });
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
// Synonyms for use as window functions.
factory.registerFunction("first_value",
{ createAggregateFunctionAny, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("first_value_respect_nulls",
{ createAggregateFunctionNullableAny<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value",
{ createAggregateFunctionAnyLast, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value_respect_nulls",
{ createAggregateFunctionNullableAnyLast<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
factory.registerNullsActionTransformation("any", "any_respect_nulls");
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
}
}

View File

@ -77,7 +77,7 @@ public:
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
data(place).count += countBytesInFilter(flags);
data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
}
else
{
@ -116,7 +116,7 @@ public:
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@ -267,7 +267,7 @@ public:
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override

View File

@ -1,23 +1,11 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Poco/String.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
@ -28,10 +16,11 @@ struct Settings;
namespace ErrorCodes
{
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE;
extern const int UNKNOWN_AGGREGATE_FUNCTION;
}
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
@ -59,6 +48,23 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
}
}
void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls)
{
if (!aggregate_functions.contains(source_ignores_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found");
if (!aggregate_functions.contains(target_respect_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found");
if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", source_ignores_nulls);
if (!ignore_nulls.emplace(target_respect_nulls, source_ignores_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", target_respect_nulls);
}
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
{
DataTypes res_types;
@ -70,7 +76,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
}
AggregateFunctionPtr AggregateFunctionFactory::get(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const
{
/// This to prevent costly string manipulation in parsing the aggregate function combinators.
/// Example: avgArrayArrayArrayArray...(1000 times)...Array
@ -81,8 +91,9 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
/// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
/// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
/// they must handle the nullability themselves
auto properties = tryGetProperties(name);
/// they must handle the nullability themselves.
/// Aggregate functions such as any_value_respect_nulls are considered window functions in that sense
auto properties = tryGetProperties(name, action);
bool is_window_function = properties.has_value() && properties->is_window_function;
if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->isNullable(); }))
@ -98,8 +109,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->onlyNull(); });
AggregateFunctionPtr nested_function = getImpl(
name, nested_types, nested_parameters, out_properties, has_null_arguments);
AggregateFunctionPtr nested_function = getImpl(name, action, nested_types, nested_parameters, out_properties, has_null_arguments);
// Pure window functions are not real aggregate functions. Applying
// combinators doesn't make sense for them, they must handle the
@ -110,22 +120,54 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters);
}
auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false);
auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
if (!with_original_arguments)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
return with_original_arguments;
}
std::optional<AggregateFunctionWithProperties>
AggregateFunctionFactory::getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const
{
if (action == NullsAction::RESPECT_NULLS)
{
if (auto it = respect_nulls.find(name); it == respect_nulls.end())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} does not support RESPECT NULLS", name);
else if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} RESPECT NULLS')", it->second, name);
}
if (action == NullsAction::IGNORE_NULLS)
{
if (auto it = ignore_nulls.find(name); it != ignore_nulls.end())
{
if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} IGNORE NULLS')", it->second, name);
}
/// We don't throw for IGNORE NULLS of other functions because that's the default in CH
}
return {};
}
AggregateFunctionPtr AggregateFunctionFactory::getImpl(
const String & name_param,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments) const
{
String name = getAliasToOrName(name_param);
String case_insensitive_name;
bool is_case_insensitive = false;
Value found;
@ -135,10 +177,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
found = it->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
if (!found.creator)
{
found = jt->second;
is_case_insensitive = true;
case_insensitive_name = Poco::toLower(name);
if (auto jt = case_insensitive_aggregate_functions.find(case_insensitive_name); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second;
is_case_insensitive = true;
}
}
ContextPtr query_context;
@ -147,11 +193,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (found.creator)
{
out_properties = found.properties;
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? case_insensitive_name : name, action);
if (opt)
found = *opt;
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? case_insensitive_name : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments)
@ -196,7 +245,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters);
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties);
AggregateFunctionPtr nested_function = get(nested_name, action, nested_types, nested_parameters, out_properties);
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
}
@ -213,16 +262,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
}
AggregateFunctionPtr AggregateFunctionFactory::tryGet(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
{
return isAggregateFunctionName(name)
? get(name, argument_types, parameters, out_properties)
: nullptr;
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name, NullsAction action) const
{
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
@ -231,6 +271,8 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
{
name = getAliasToOrName(name);
Value found;
String lower_case_name;
bool is_case_insensitive = false;
/// Find by exact match.
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
@ -238,11 +280,23 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
found = it->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
if (!found.creator)
{
lower_case_name = Poco::toLower(name);
if (auto jt = case_insensitive_aggregate_functions.find(lower_case_name); jt != case_insensitive_aggregate_functions.end())
{
is_case_insensitive = true;
found = jt->second;
}
}
if (found.creator)
{
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? lower_case_name : name, action);
if (opt)
return opt->properties;
return found.properties;
}
/// Combinators of aggregate functions.
/// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
@ -262,27 +316,29 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
}
bool AggregateFunctionFactory::isAggregateFunctionName(String name) const
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name_) const
{
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
if (name_.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
while (true)
if (aggregate_functions.contains(name_) || isAlias(name_))
return true;
String name_lowercase = Poco::toLower(name_);
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
return true;
String name = name_;
while (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
if (aggregate_functions.contains(name) || isAlias(name))
return true;
name = name.substr(0, name.size() - combinator->getName().size());
name_lowercase = name_lowercase.substr(0, name_lowercase.size() - combinator->getName().size());
String name_lowercase = Poco::toLower(name);
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
if (aggregate_functions.contains(name) || isAlias(name) || case_insensitive_aggregate_functions.contains(name_lowercase)
|| isAlias(name_lowercase))
return true;
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
name = name.substr(0, name.size() - combinator->getName().size());
}
else
return false;
}
return false;
}
AggregateFunctionFactory & AggregateFunctionFactory::instance()

View File

@ -1,9 +1,9 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/IFactoryWithAliases.h>
#include <functional>
#include <memory>
@ -62,36 +62,44 @@ public:
Value creator,
CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register how to transform from one aggregate function to other based on NullsAction
/// Registers them both ways:
/// SOURCE + RESPECT NULLS will be transformed to TARGET
/// TARGET + IGNORE NULLS will be transformed to SOURCE
void registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls);
/// Throws an exception if not found.
AggregateFunctionPtr
get(const String & name,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Returns nullptr if not found.
AggregateFunctionPtr tryGet(
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Get properties if the aggregate function exists.
std::optional<AggregateFunctionProperties> tryGetProperties(String name) const;
std::optional<AggregateFunctionProperties> tryGetProperties(String name, NullsAction action) const;
bool isAggregateFunctionName(String name) const;
bool isAggregateFunctionName(const String & name) const;
private:
AggregateFunctionPtr getImpl(
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments) const;
using AggregateFunctions = std::unordered_map<String, Value>;
using ActionMap = std::unordered_map<String, String>;
AggregateFunctions aggregate_functions;
/// Mapping from functions with `RESPECT NULLS` modifier to actual aggregate function names
/// Example: `any(x) RESPECT NULLS` should be executed as function `any_respect_nulls`
ActionMap respect_nulls;
/// Same as above for `IGNORE NULLS` modifier
ActionMap ignore_nulls;
std::optional<AggregateFunctionWithProperties> getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const;
/// Case insensitive aggregate functions will be additionally added here with lowercased name.
AggregateFunctions case_insensitive_aggregate_functions;

View File

@ -0,0 +1,82 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Common/Exception.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
namespace
{
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
}
template <typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<GroupArraySortedNumericImpl>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
WhichDataType which(argument_type);
return std::make_shared<GroupArraySortedGeneralImpl<GroupArraySortedNodeGeneral>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
if (parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
}
else if (parameters.size() == 1)
{
auto type = parameters[0].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} does not support this number of arguments", name);
return createAggregateFunctionGroupArraySortedImpl(argument_types[0], parameters, max_elems);
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArraySorted, properties });
}
}

View File

@ -0,0 +1,355 @@
#pragma once
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Functions/array/arraySort.h>
#include <Common/Exception.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/IDataType.h>
#include <base/sort.h>
#include <Columns/IColumn.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/RadixSort.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
}
template <typename T>
struct GroupArraySortedData;
template <typename T>
struct GroupArraySortedData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value;
};
template <typename T>
class GroupArraySortedNumericImpl final
: public IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>
{
using Data = GroupArraySortedData<T>;
UInt64 max_elems;
SerializationPtr serialization;
public:
explicit GroupArraySortedNumericImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elems(max_elems_)
, serialization(data_type_->getDefaultSerialization())
{
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
auto & cur_elems = this->data(place);
cur_elems.value.push_back(row_value, arena);
/// To optimize, we sort (2 * max_size) elements of input array over and over again
/// and after each loop we delete the last half of sorted array
if (cur_elems.value.size() >= max_elems * 2)
{
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
cur_elems.value.resize(max_elems, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
if (rhs_elems.value.empty())
return;
if (rhs_elems.value.size())
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
size_t elems_size = cur_elems.value.size() < max_elems ? cur_elems.value.size() : max_elems;
cur_elems.value.resize(elems_size, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
for (const auto & elem : value)
writeBinaryLittleEndian(elem, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
auto & value = this->data(place).value;
value.resize(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto& value = this->data(place).value;
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
size_t elems_size = value.size() < max_elems ? value.size() : max_elems;
value.resize(elems_size, arena);
size_t size = value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
value.resize(elems_size, arena);
}
}
bool allocatesMemoryInArena() const override { return true; }
};
template <typename Node, bool has_sampler>
struct GroupArraySortedGeneralData;
template <typename Node>
struct GroupArraySortedGeneralData<Node, false>
{
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
using Array = PODArray<Field, 32, Allocator>;
Array value;
};
template <typename Node>
struct GroupArraySortedNodeBase
{
UInt64 size; // size of payload
/// Returns pointer to actual payload
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
};
struct GroupArraySortedNodeString : public GroupArraySortedNodeBase<GroupArraySortedNodeString>
{
using Node = GroupArraySortedNodeString;
};
struct GroupArraySortedNodeGeneral : public GroupArraySortedNodeBase<GroupArraySortedNodeGeneral>
{
using Node = GroupArraySortedNodeGeneral;
};
/// Implementation of groupArraySorted for Generic data via Array
template <typename Node>
class GroupArraySortedGeneralImpl final
: public IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>
{
using Data = GroupArraySortedGeneralData<Node, false>;
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
DataTypePtr & data_type;
UInt64 max_elems;
SerializationPtr serialization;
public:
GroupArraySortedGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, serialization(data_type->getDefaultSerialization())
{
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_elems = data(place);
cur_elems.value.push_back(columns[0][0][row_num], arena);
/// To optimize, we sort (2 * max_size) elements of input array over and over again and
/// after each loop we delete the last half of sorted array
if (cur_elems.value.size() >= max_elems * 2)
{
std::sort(cur_elems.value.begin(), cur_elems.value.begin() + (max_elems * 2));
cur_elems.value.erase(cur_elems.value.begin() + max_elems, cur_elems.value.begin() + (max_elems * 2));
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = data(place);
auto & rhs_elems = data(rhs);
if (rhs_elems.value.empty())
return;
UInt64 new_elems = rhs_elems.value.size();
for (UInt64 i = 0; i < new_elems; ++i)
cur_elems.value.push_back(rhs_elems.value[i], arena);
checkArraySize(cur_elems.value.size(), AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
if (!cur_elems.value.empty())
{
std::sort(cur_elems.value.begin(), cur_elems.value.end());
if (cur_elems.value.size() > max_elems)
cur_elems.value.resize(max_elems, arena);
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & value = data(place).value;
size_t size = value.size();
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
writeVarUInt(size, buf);
for (const Field & elem : value)
{
if (elem.isNull())
{
writeBinary(false, buf);
}
else
{
writeBinary(true, buf);
serialization->serializeBinary(elem, buf, {});
}
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
auto & value = data(place).value;
value.resize(size, arena);
for (Field & elem : value)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
serialization->deserializeBinary(elem, buf, {});
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto & column_array = assert_cast<ColumnArray &>(to);
auto & value = data(place).value;
if (!value.empty())
{
std::sort(value.begin(), value.end());
if (value.size() > max_elems)
value.resize_exact(max_elems, arena);
}
auto & offsets = column_array.getOffsets();
offsets.push_back(offsets.back() + value.size());
auto & column_data = column_array.getData();
if (std::is_same_v<Node, GroupArraySortedNodeString>)
{
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
string_offsets.reserve(string_offsets.size() + value.size());
}
for (const Field& field : value)
column_data.insert(field);
}
bool allocatesMemoryInArena() const override { return true; }
};
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
}

View File

@ -771,26 +771,18 @@ static_assert(
/// For any other value types.
template <bool RESULT_IS_NULLABLE = false>
struct SingleValueDataGeneric
{
private:
using Self = SingleValueDataGeneric;
Field value;
bool has_value = false;
public:
static constexpr bool result_is_nullable = RESULT_IS_NULLABLE;
static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE;
static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false;
bool has() const
{
if constexpr (result_is_nullable)
return has_value;
return !value.isNull();
}
bool has() const { return !value.isNull(); }
void insertResultInto(IColumn & to) const
{
@ -820,19 +812,9 @@ public:
serialization.deserializeBinary(value, buf, {});
}
void change(const IColumn & column, size_t row_num, Arena *)
{
column.get(row_num, value);
if constexpr (result_is_nullable)
has_value = true;
}
void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); }
void change(const Self & to, Arena *)
{
value = to.value;
if constexpr (result_is_nullable)
has_value = true;
}
void change(const Self & to, Arena *) { value = to.value; }
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
{
@ -847,7 +829,7 @@ public:
bool changeFirstTime(const Self & to, Arena * arena)
{
if (!has() && (result_is_nullable || to.has()))
if (!has() && to.has())
{
change(to, arena);
return true;
@ -882,30 +864,15 @@ public:
}
else
{
if constexpr (result_is_nullable)
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || new_value < value))
{
value = new_value;
return true;
}
else
return false;
value = new_value;
return true;
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
value = new_value;
return true;
}
else
return false;
}
return false;
}
}
@ -913,30 +880,13 @@ public:
{
if (!to.has())
return false;
if constexpr (result_is_nullable)
if (!has() || to.value < value)
{
if (!has())
{
change(to, arena);
return true;
}
if (to.value.isNull() || (!value.isNull() && to.value < value))
{
value = to.value;
return true;
}
return false;
change(to, arena);
return true;
}
else
{
if (!has() || to.value < value)
{
change(to, arena);
return true;
}
else
return false;
}
return false;
}
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
@ -948,29 +898,15 @@ public:
}
else
{
if constexpr (result_is_nullable)
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || value < new_value))
{
value = new_value;
return true;
}
return false;
value = new_value;
return true;
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
value = new_value;
return true;
}
else
return false;
}
return false;
}
}
@ -978,36 +914,18 @@ public:
{
if (!to.has())
return false;
if constexpr (result_is_nullable)
if (!has() || to.value > value)
{
if (!value.isNull() && (to.value.isNull() || value < to.value))
{
value = to.value;
return true;
}
return false;
change(to, arena);
return true;
}
else
{
if (!has() || to.value > value)
{
change(to, arena);
return true;
}
else
return false;
}
return false;
}
bool isEqualTo(const IColumn & column, size_t row_num) const
{
return has() && value == column[row_num];
}
bool isEqualTo(const IColumn & column, size_t row_num) const { return has() && value == column[row_num]; }
bool isEqualTo(const Self & to) const
{
return has() && to.value == value;
}
bool isEqualTo(const Self & to) const { return has() && to.value == value; }
static bool allocatesMemoryInArena()
{

View File

@ -150,7 +150,7 @@ public:
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), this->argument_types, params, properties),
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), NullsAction::EMPTY, this->argument_types, params, properties),
this->argument_types,
params);
}

View File

@ -142,6 +142,7 @@ struct AggregateFunctionSumData
), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
{
ptr += start;
condition_map += start;
size_t count = end - start;
const auto * end_ptr = ptr + count;

View File

@ -20,7 +20,7 @@ template <template <typename> class Data>
class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator
{
public:
String getName() const override { return Data<SingleValueDataGeneric<>>::name(); }
String getName() const override { return Data<SingleValueDataGeneric>::name(); }
DataTypes transformArguments(const DataTypes & arguments) const override
{
@ -66,7 +66,7 @@ public:
if (which.idx == TypeIndex::String)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric<>>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric>>>(nested_function, arguments, params);
}
};

View File

@ -33,6 +33,8 @@ class AggregateFunctionIf final : public IAggregateFunctionHelper<AggregateFunct
private:
AggregateFunctionPtr nested_func;
size_t num_arguments;
/// We accept Nullable(Nothing) as condition, but callees always expect UInt8 so we need to avoid calling them
bool only_null_condition = false;
public:
AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_)
@ -42,7 +44,9 @@ public:
if (num_arguments == 0)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require at least one argument", getName());
if (!isUInt8(types.back()) && !types.back()->onlyNull())
only_null_condition = types.back()->onlyNull();
if (!isUInt8(types.back()) && !only_null_condition)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last argument for aggregate function {} must be UInt8", getName());
}
@ -108,6 +112,8 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if (only_null_condition)
return;
if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
nested_func->add(place, columns, row_num, arena);
}
@ -121,6 +127,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1);
}
@ -132,6 +140,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1);
}
@ -144,6 +154,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1);
}

View File

@ -447,7 +447,8 @@ public:
{
AggregateFunctionProperties out_properties;
auto & aggr_func_factory = AggregateFunctionFactory::instance();
return aggr_func_factory.get(nested_func_name + "MappedArrays", arguments, params, out_properties);
auto action = NullsAction::EMPTY;
return aggr_func_factory.get(nested_func_name + "MappedArrays", action, arguments, params, out_properties);
}
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '{}Map' is not implemented for mapped arrays",

View File

@ -100,7 +100,16 @@ public:
if (has_null_types)
{
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
/** Some functions, such as `count`, `uniq`, and others, return 0 :: UInt64 instead of NULL for a NULL argument.
* These functions have the `returns_default_when_only_null` property, so we explicitly specify the result type
* when replacing the function with `nothing`.
*
* Note: It's a bit dangerous to have the function result type depend on properties because we do not serialize properties in AST,
* and we can lose this information. For example, when we have `count(NULL)` replaced with `nothing(NULL) as "count(NULL)"` and send it
* to the remote server, the remote server will execute `nothing(NULL)` and return `NULL` while `0` is expected.
*
* To address this, we handle `nothing` in a special way in `FunctionNode::toASTImpl`.
*/
if (properties.returns_default_when_only_null)
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
else
@ -144,11 +153,18 @@ public:
}
else
{
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
#if 0
if (serialize_flag)
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
else
/// This should be <false, false> (no serialize flag) but it was initially added incorrectly and
/// changing it would break the binary compatibility of aggregation states using this method
// (such as AggregateFunction(argMaxOrNull, Nullable(Int64), UInt64)). The extra flag is harmless
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
}
#endif
}
}
}
};

View File

@ -35,8 +35,8 @@ public:
auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName());
// Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type.
AggregateFunctionProperties properties;
auto function
= AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties);
auto function = AggregateFunctionFactory::instance().get(
nested_->getName(), NullsAction::EMPTY, {storage_type_out}, nested_->getParameters(), properties);
// Need to make a clone because it'll be customized.
auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName());

View File

@ -14,8 +14,9 @@ namespace DB
struct Settings;
/// min, max, any, anyLast, anyHeavy, etc...
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data>
static IAggregateFunction * createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
template <template <typename> class AggregateFunctionTemplate, template <typename, bool...> class Data>
static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
@ -44,31 +45,9 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
}
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
// If the result value could be null (excluding the case that no row is matched),
// use SingleValueDataGeneric.
if constexpr (!RespectNulls)
{
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
}
else
{
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
}
UNREACHABLE();
}
/// argMin, argMax
template <template <typename> class MinMaxData, typename ResData>
static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type)
@ -98,7 +77,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
}
template <template <typename> class MinMaxData>
@ -134,7 +113,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
}
}

View File

@ -289,15 +289,6 @@ public:
Arena * arena,
ssize_t if_argument_pos = -1) const = 0;
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const = 0;
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
* -Array combinator. It might also be used generally to break data dependency when array
@ -586,31 +577,6 @@ public:
}
}
void addBatchSinglePlaceFromInterval( /// NOLINT
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos = -1)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; ++i)
{
if (flags[i])
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
else
{
for (size_t i = row_begin; i < row_end; ++i)
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
}
void addBatchArray(
size_t row_begin,
size_t row_end,

View File

@ -43,6 +43,7 @@ namespace ErrorCodes
template <typename T>
class QuantileTDigest
{
friend class TDigestStatistic;
using Value = Float32;
using Count = Float32;
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
@ -334,6 +335,44 @@ public:
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
}
Float64 getCountLessThan(Float64 value) const
{
bool first = true;
Count sum = 0;
Count prev_count = 0;
Float64 prev_x = 0;
Value prev_mean = 0;
for (const auto & c : centroids)
{
/// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
Float64 current_x = sum + c.count * 0.5;
if (c.mean >= value)
{
/// value is smaller than any value.
if (first)
return 0;
Float64 left = prev_x + 0.5 * (prev_count == 1);
Float64 right = current_x - 0.5 * (c.count == 1);
Float64 result = checkOverflow<Float64>(interpolate(
static_cast<Value>(value),
prev_mean,
static_cast<Value>(left),
c.mean,
static_cast<Value>(right)));
return result;
}
sum += c.count;
prev_mean = c.mean;
prev_count = c.count;
prev_x = current_x;
first = false;
}
/// count is larger than any value.
return count;
}
/** Calculates the quantile q [0, 1] based on the digest.
* For an empty digest returns NaN.
*/

View File

@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
@ -111,6 +112,7 @@ void registerAggregateFunctions()
registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionDeltaSumTimestamp(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory);

View File

@ -113,6 +113,11 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
buffer << ", function_type: " << function_type;
if (nulls_action == NullsAction::RESPECT_NULLS)
buffer << ", nulls_action : RESPECT_NULLS";
else if (nulls_action == NullsAction::IGNORE_NULLS)
buffer << ", nulls_action : IGNORE_NULLS";
if (function)
buffer << ", result_type: " + getResultType()->getName();
@ -140,10 +145,9 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const FunctionNode &>(rhs);
if (function_name != rhs_typed.function_name ||
isAggregateFunction() != rhs_typed.isAggregateFunction() ||
isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() ||
isWindowFunction() != rhs_typed.isWindowFunction())
if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction()
|| isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || isWindowFunction() != rhs_typed.isWindowFunction()
|| nulls_action != rhs_typed.nulls_action)
return false;
if (isResolved() != rhs_typed.isResolved())
@ -171,6 +175,7 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const
hash_state.update(isOrdinaryFunction());
hash_state.update(isAggregateFunction());
hash_state.update(isWindowFunction());
hash_state.update(nulls_action);
if (!isResolved())
return;
@ -192,6 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
*/
result_function->function = function;
result_function->kind = kind;
result_function->nulls_action = nulls_action;
result_function->wrap_with_nullable = wrap_with_nullable;
return result_function;
@ -202,6 +208,19 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
auto function_ast = std::make_shared<ASTFunction>();
function_ast->name = function_name;
function_ast->nulls_action = nulls_action;
if (function_name == "nothing")
{
/** Inside AggregateFunctionCombinatorNull we may replace functions with `NULL` in arguments with `nothing`.
* Result type of `nothing` depends on `returns_default_when_only_null` property of nested function.
* If we convert `nothing` to AST, we will lose this information, so we use original function name instead.
*/
const auto & original_ast = getOriginalAST();
const auto & original_function_ast = original_ast ? original_ast->as<ASTFunction>() : nullptr;
if (original_function_ast)
function_ast->name = original_function_ast->name;
}
if (isWindowFunction())
{

View File

@ -5,11 +5,12 @@
#include <Analyzer/ConstantValue.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#include <Common/typeid_cast.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/IResolvedFunction.h>
#include <DataTypes/DataTypeNullable.h>
#include <Functions/IFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/typeid_cast.h>
namespace DB
{
@ -63,6 +64,10 @@ public:
/// Get function name
const String & getFunctionName() const { return function_name; }
/// Get NullAction modifier
NullsAction getNullsAction() const { return nulls_action; }
void setNullsAction(NullsAction action) { nulls_action = action; }
/// Get parameters
const ListNode & getParameters() const { return children[parameters_child_index]->as<const ListNode &>(); }
@ -214,6 +219,7 @@ protected:
private:
String function_name;
FunctionKind kind = FunctionKind::UNKNOWN;
NullsAction nulls_action = NullsAction::EMPTY;
IResolvedFunctionPtr function;
bool wrap_with_nullable = false;

View File

@ -278,6 +278,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
if (it != replacement_map.end())
continue;
node_clone->original_ast = node_to_clone->original_ast;
node_clone->setAlias(node_to_clone->alias);
node_clone->children = node_to_clone->children;
node_clone->weak_pointers = node_to_clone->weak_pointers;
@ -318,6 +319,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
*weak_pointer_ptr = it->second;
}
result_cloned_node_place->original_ast = original_ast;
return result_cloned_node_place;
}

View File

@ -184,10 +184,9 @@ private:
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
{ argument->getResultType() },
function_aggregate_function->getParameters(),
properties);
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -76,7 +76,8 @@ public:
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
}

View File

@ -78,9 +78,11 @@ QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String
return function_node;
}
FunctionNodePtr createResolvedAggregateFunction(const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {})
FunctionNodePtr createResolvedAggregateFunction(
const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {}, NullsAction action = NullsAction::EMPTY)
{
auto function_node = std::make_shared<FunctionNode>(name);
function_node->setNullsAction(action);
if (!parameters.empty())
{
@ -92,11 +94,7 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
function_node->getArguments().getNodes() = { argument };
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
name,
{ argument->getResultType() },
parameters,
properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(name, action, {argument->getResultType()}, parameters, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
return function_node;

View File

@ -1,134 +0,0 @@
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/LambdaNode.h>
#include <Analyzer/ConstantNode.h>
namespace DB
{
namespace
{
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_move_functions_out_of_any)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// check function is any
const auto & function_name = function_node->getFunctionName();
if (function_name != "any" && function_name != "anyLast")
return;
auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 1)
return;
auto * inside_function_node = arguments[0]->as<FunctionNode>();
/// check argument is a function
if (!inside_function_node)
return;
/// check arguments can not contain arrayJoin or lambda
if (!canRewrite(inside_function_node))
return;
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
/// case any(f())
if (inside_function_node_arguments.empty())
return;
auto it = node_to_rewritten_node.find(node.get());
if (it != node_to_rewritten_node.end())
{
node = it->second;
return;
}
/// checking done, rewrite function
bool changed_argument = false;
for (auto & inside_argument : inside_function_node_arguments)
{
if (inside_argument->as<ConstantNode>()) /// skip constant node
break;
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
auto any_function = std::make_shared<FunctionNode>(function_name);
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
auto & any_function_arguments = any_function->getArguments().getNodes();
any_function_arguments.push_back(std::move(inside_argument));
inside_argument = std::move(any_function);
changed_argument = true;
}
if (changed_argument)
{
node_to_rewritten_node.emplace(node.get(), arguments[0]);
node = arguments[0];
}
}
private:
bool canRewrite(const FunctionNode * function_node)
{
for (const auto & argument : function_node->getArguments().getNodes())
{
if (argument->as<LambdaNode>())
return false;
if (const auto * inside_function = argument->as<FunctionNode>())
{
/// Function arrayJoin is special and should be skipped (think about it as
/// an aggregate function), otherwise wrong result will be produced.
/// For example:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
if (inside_function->getFunctionName() == "arrayJoin")
return false;
if (!canRewrite(inside_function))
return false;
}
}
return true;
}
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
/// So we can reuse the rewritten function.
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
};
}
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
*
* Example: SELECT any(f(x, y, g(z)));
* Result: SELECT f(any(x), any(y), g(any(z)));
*/
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
{
public:
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
String getDescription() override
{
return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -56,7 +56,7 @@ private:
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -118,6 +118,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -1208,7 +1209,8 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
{
@ -2310,7 +2312,8 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context)
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
{
std::string result_aggregate_function_name = aggregate_function_name;
auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name);
@ -2337,7 +2340,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null)
{
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (!properties->returns_default_when_only_null)
result_aggregate_function_name += "OrNull";
}
@ -2349,7 +2352,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
*/
if (result_aggregate_function_name.ends_with("OrNull"))
{
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (function_properies && !function_properies->returns_default_when_only_null)
{
size_t function_name_size = result_aggregate_function_name.size();
@ -4591,6 +4594,19 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
return result_projection_names;
}
namespace
{
void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
{
if (node.getNullsAction() != NullsAction::EMPTY)
throw Exception(
ErrorCodes::SYNTAX_ERROR,
"Function with name '{}' cannot use {} NULLS",
node.getFunctionName(),
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
}
}
/** Resolve function node in scope.
* During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function),
* with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately.
@ -4749,6 +4765,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_exists)
{
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1).
auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0);
@ -4769,6 +4786,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty())
{
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/** Handle special case with constant If function, even if some of the arguments are invalid.
*
* SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers;
@ -4834,6 +4852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
if (is_special_function_in)
{
checkFunctionNodeHasEmptyNullsAction(function_node);
if (scope.context->getSettingsRef().transform_null_in)
{
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
@ -5012,6 +5031,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
lambda_expression_untyped->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
if (!parameters.empty())
{
throw Exception(
@ -5041,6 +5062,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Function 'untuple' must have 1 argument. In scope {}",
scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
const auto & untuple_argument = function_arguments[0];
auto result_type = untuple_argument->getResultType();
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
@ -5091,6 +5114,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Function GROUPING can have up to 64 arguments, but {} provided",
function_arguments_size);
checkFunctionNodeHasEmptyNullsAction(function_node);
bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility;
auto grouping_function = std::make_shared<FunctionGrouping>(force_grouping_standard_compatibility);
@ -5115,10 +5139,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Window function '{}' does not support lambda arguments",
function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsWindowFunction(std::move(aggregate_function));
@ -5142,7 +5168,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
is_executable_udf = false;
}
if (!function)
if (function)
{
checkFunctionNodeHasEmptyNullsAction(function_node);
}
else
{
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
{
@ -5181,10 +5211,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Aggregate function '{}' does not support lambda arguments",
function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -97,6 +97,7 @@ private:
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + suffix,
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);

View File

@ -157,10 +157,8 @@ private:
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("countIf",
{argument_type},
function_node.getAggregateFunction()->getParameters(),
properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -76,7 +76,9 @@ public:
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_node->getFunctionName(),
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
NullsAction::EMPTY,
argument_types,
function_node->getAggregateFunction()->getParameters(),
properties);

View File

@ -176,7 +176,7 @@ public:
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -607,6 +607,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
else
{
auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
if (function->parameters)
{

View File

@ -44,7 +44,6 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
}

View File

@ -544,11 +544,8 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
return AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
argument_types,
parameters,
properties);
auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
}
}

View File

@ -55,6 +55,10 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
callback(my_faulty_zookeeper);
}
else
{
my_faulty_zookeeper->setKeeper(zookeeper);
}
}
const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const

View File

@ -222,6 +222,7 @@ add_object_library(clickhouse_storages Storages)
add_object_library(clickhouse_storages_mysql Storages/MySQL)
add_object_library(clickhouse_storages_distributed Storages/Distributed)
add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)

View File

@ -2861,7 +2861,7 @@ void ClientBase::init(int argc, char ** argv)
("interactive", "Process queries-file or --query query and start interactive mode")
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
;
addOptions(options_description);
@ -2996,10 +2996,12 @@ void ClientBase::init(int argc, char ** argv)
clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/);
if (max_client_memory_usage != "0")
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
UInt64 max_client_memory_usage_int = parseWithSizeSuffix<UInt64>(max_client_memory_usage.c_str(), max_client_memory_usage.length());
total_memory_tracker.setHardLimit(max_client_memory_usage_int);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}

View File

@ -46,6 +46,7 @@
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
@ -384,6 +385,39 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
// the generic recursion into IAST.children.
}
void QueryFuzzer::fuzzNullsAction(NullsAction & action)
{
/// If it's not using actions, then it's a high change it doesn't support it to begin with
if ((action == NullsAction::EMPTY) && (fuzz_rand() % 100 == 0))
{
if (fuzz_rand() % 2 == 0)
action = NullsAction::RESPECT_NULLS;
else
action = NullsAction::IGNORE_NULLS;
}
else if (fuzz_rand() % 20 == 0)
{
switch (fuzz_rand() % 3)
{
case 0:
{
action = NullsAction::EMPTY;
break;
}
case 1:
{
action = NullsAction::RESPECT_NULLS;
break;
}
default:
{
action = NullsAction::IGNORE_NULLS;
break;
}
}
}
}
void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
{
switch (fuzz_rand() % 40)
@ -966,6 +1000,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
fuzzColumnLikeExpressionList(fn->arguments.get());
fuzzColumnLikeExpressionList(fn->parameters.get());
if (AggregateUtils::isAggregateFunction(*fn))
fuzzNullsAction(fn->nulls_action);
if (fn->is_window_function && fn->window_definition)
{
auto & def = fn->window_definition->as<ASTWindowDefinition &>();

Some files were not shown because too many files have changed in this diff Show More