mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge remote-tracking branch 'upstream/master' into ncb/hostname-system-log-tables
This commit is contained in:
commit
440dc66a5c
@ -33,10 +33,9 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 14
|
||||
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15
|
||||
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
|
||||
* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12
|
||||
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12
|
||||
|
||||
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
||||
|
@ -385,9 +385,25 @@ endif ()
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
|
||||
|
||||
# These files needs to be installed to make it possible that users can use well-known protobuf types
|
||||
set(google_proto_files
|
||||
${protobuf_source_dir}/src/google/protobuf/any.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/api.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/descriptor.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/duration.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/source_context.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/struct.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/timestamp.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/type.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/wrappers.proto
|
||||
)
|
||||
|
||||
add_library(_protobuf INTERFACE)
|
||||
target_link_libraries(_protobuf INTERFACE _libprotobuf)
|
||||
target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
|
||||
set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
|
||||
add_library(ch_contrib::protobuf ALIAS _protobuf)
|
||||
|
||||
add_library(_protoc INTERFACE)
|
||||
|
@ -33,7 +33,7 @@ target_include_directories(cxxabi SYSTEM BEFORE
|
||||
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
|
||||
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
|
||||
)
|
||||
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
|
||||
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DHAS_THREAD_LOCAL)
|
||||
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
|
||||
target_link_libraries(cxxabi PUBLIC unwind)
|
||||
|
||||
|
2
contrib/libpqxx
vendored
2
contrib/libpqxx
vendored
@ -1 +1 @@
|
||||
Subproject commit 791d68fd89902835133c50435e380ec7a73271b7
|
||||
Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640
|
2
contrib/qpl
vendored
2
contrib/qpl
vendored
@ -1 +1 @@
|
||||
Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
|
||||
Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298
|
@ -47,6 +47,12 @@ SELECT * FROM test_table;
|
||||
└──────┴───────┘
|
||||
```
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## See also
|
||||
|
||||
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)
|
||||
|
@ -85,6 +85,10 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
|
||||
</rocksdb>
|
||||
```
|
||||
|
||||
By default trivial approximate count optimization is turned off, which might affect the performance `count()` queries. To enable this
|
||||
optimization set up `optimize_trivial_approximate_count_query = 1`. Also, this setting affects `system.tables` for EmbeddedRocksDB engine,
|
||||
turn on the settings to see approximate values for `total_rows` and `total_bytes`.
|
||||
|
||||
## Supported operations {#supported-operations}
|
||||
|
||||
### Inserts
|
||||
|
@ -230,8 +230,9 @@ libhdfs3 support HDFS namenode HA.
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
|
@ -142,8 +142,9 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
|
||||
|
||||
|
@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [TTL expr1] [PRIMARY KEY],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [TTL expr2] [PRIMARY KEY],
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
|
||||
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
|
||||
@ -1358,3 +1358,33 @@ In this sample configuration:
|
||||
- `_partition_value` — Values (a tuple) of a `partition by` expression.
|
||||
- `_sample_factor` — Sample factor (from the query).
|
||||
- `_block_number` — Block number of the row, it is persisted on merges when `allow_experimental_block_number_column` is set to true.
|
||||
|
||||
## Column Statistics (Experimental) {#column-statistics}
|
||||
|
||||
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
(
|
||||
a Int64 STATISTIC(tdigest),
|
||||
b Float64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY a
|
||||
```
|
||||
|
||||
We can also manipulate statistics with `ALTER` statements.
|
||||
|
||||
```sql
|
||||
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
|
||||
```
|
||||
|
||||
These lightweight statistics aggregate information about distribution of values in columns.
|
||||
They can be used for query optimization when we enable `set allow_statistic_optimize = 1`.
|
||||
|
||||
#### Available Types of Column Statistics {#available-types-of-column-statistics}
|
||||
|
||||
- `tdigest`
|
||||
|
||||
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
|
||||
|
@ -87,12 +87,18 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
|
||||
- Indices
|
||||
- Replication
|
||||
|
||||
## PARTITION BY
|
||||
## PARTITION BY {#partition-by}
|
||||
|
||||
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
|
@ -103,6 +103,12 @@ SELECT * FROM url_engine_table
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -56,7 +56,7 @@ On Linux, macOS and FreeBSD:
|
||||
./clickhouse client
|
||||
ClickHouse client version 23.2.1.1501 (official build).
|
||||
Connecting to localhost:9000 as user default.
|
||||
Connected to ClickHouse server version 23.2.1 revision 54461.
|
||||
Connected to ClickHouse server version 23.2.1.
|
||||
|
||||
local-host :)
|
||||
```
|
||||
|
@ -16,7 +16,7 @@ ClickHouse provides a native command-line client: `clickhouse-client`. The clien
|
||||
$ clickhouse-client
|
||||
ClickHouse client version 20.13.1.5273 (official build).
|
||||
Connecting to localhost:9000 as user default.
|
||||
Connected to ClickHouse server version 20.13.1 revision 54442.
|
||||
Connected to ClickHouse server version 20.13.1.
|
||||
|
||||
:)
|
||||
```
|
||||
|
@ -16,9 +16,9 @@ More information about PGO in ClickHouse you can read in the corresponding GitHu
|
||||
|
||||
There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse.
|
||||
|
||||
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`.
|
||||
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-generate` option to `CXXFLAGS`.
|
||||
2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments.
|
||||
3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step.
|
||||
3. Recompile ClickHouse once again with `-fprofile-use` compiler flags and profiles that are collected from the previous step.
|
||||
|
||||
A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization).
|
||||
|
||||
|
@ -1835,9 +1835,10 @@ Settings:
|
||||
|
||||
- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’.
|
||||
- `port` – Port for `endpoint`.
|
||||
- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
|
||||
- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
- `metrics` – Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
|
||||
- `events` – Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- `asynchronous_metrics` – Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1853,6 +1854,7 @@ Settings:
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<errors>true</errors>
|
||||
</prometheus>
|
||||
<!-- highlight-end -->
|
||||
</clickhouse>
|
||||
@ -2350,7 +2352,7 @@ Path on the local filesystem to store temporary data for processing large querie
|
||||
|
||||
## user_files_path {#user_files_path}
|
||||
|
||||
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).
|
||||
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md), [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -149,7 +149,7 @@ Possible values:
|
||||
- Any positive integer.
|
||||
- 0 (disable deduplication)
|
||||
|
||||
Default value: 100.
|
||||
Default value: 1000.
|
||||
|
||||
The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper.
|
||||
A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.
|
||||
|
@ -4801,6 +4801,14 @@ a Tuple(
|
||||
)
|
||||
```
|
||||
|
||||
## allow_experimental_statistic {#allow_experimental_statistic}
|
||||
|
||||
Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
|
||||
|
||||
## allow_statistic_optimize {#allow_statistic_optimize}
|
||||
|
||||
Allows using statistic to optimize the order of [prewhere conditions](../../sql-reference/statements/select/prewhere.md).
|
||||
|
||||
## analyze_index_with_space_filling_curves
|
||||
|
||||
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
|
||||
|
@ -31,3 +31,26 @@ SELECT * FROM system.numbers LIMIT 10;
|
||||
|
||||
10 rows in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
You can also limit the output by predicates.
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.numbers < 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
│ 3 │
|
||||
│ 4 │
|
||||
│ 5 │
|
||||
│ 6 │
|
||||
│ 7 │
|
||||
│ 8 │
|
||||
│ 9 │
|
||||
└────────┘
|
||||
|
||||
10 rows in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
@ -18,7 +18,9 @@ Columns:
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision.
|
||||
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
|
||||
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
|
||||
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server.
|
||||
|
||||
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server.
|
||||
|
||||
- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type:
|
||||
- `Real` represents collecting stack traces by wall-clock time.
|
||||
- `CPU` represents collecting stack traces by CPU time.
|
||||
|
@ -5,7 +5,12 @@ sidebar_position: 6
|
||||
|
||||
# any
|
||||
|
||||
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column.
|
||||
Selects the first encountered value of a column.
|
||||
|
||||
By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
|
||||
|
||||
The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
|
||||
|
||||
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
|
||||
To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’.
|
||||
|
||||
@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe
|
||||
|
||||
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
|
||||
|
||||
- Alias: `any_value`
|
||||
- Alias: `any_value`, `first_value`.
|
||||
|
@ -5,9 +5,12 @@ sidebar_position: 7
|
||||
|
||||
# first_value
|
||||
|
||||
Selects the first encountered value, similar to `any`, but could accept NULL.
|
||||
Mostly it should be used with [Window Functions](../../window-functions/index.md).
|
||||
Without Window Functions the result will be random if the source stream is not ordered.
|
||||
It is an alias for [`any`](../../../sql-reference/aggregate-functions/reference/any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse aggregate functions ignore NULL values).
|
||||
|
||||
It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations.
|
||||
|
||||
As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type
|
||||
matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added).
|
||||
|
||||
## examples
|
||||
|
||||
@ -23,15 +26,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null);
|
||||
```
|
||||
|
||||
### example1
|
||||
The NULL value is ignored at default.
|
||||
By default, the NULL value is ignored.
|
||||
```sql
|
||||
select first_value(b) from test_data;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_ignore_nulls(b)─┐
|
||||
│ 3 │
|
||||
└─────────────────────────────┘
|
||||
┌─any(b)─┐
|
||||
│ 3 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
### example2
|
||||
@ -41,9 +44,9 @@ select first_value(b) ignore nulls from test_data
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_ignore_nulls(b)─┐
|
||||
│ 3 │
|
||||
└─────────────────────────────┘
|
||||
┌─any(b) IGNORE NULLS ─┐
|
||||
│ 3 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
### example3
|
||||
@ -53,9 +56,9 @@ select first_value(b) respect nulls from test_data
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_respect_nulls(b)─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└──────────────────────────────┘
|
||||
┌─any(b) RESPECT NULLS ─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
### example4
|
||||
@ -73,8 +76,8 @@ FROM
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_respect_nulls(b)─┬─first_value(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ 3 │
|
||||
└──────────────────────────────┴────────────────┘
|
||||
┌─any_respect_nulls(b)─┬─any(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ 3 │
|
||||
└──────────────────────┴────────┘
|
||||
```
|
||||
|
||||
|
@ -0,0 +1,48 @@
|
||||
---
|
||||
toc_priority: 112
|
||||
---
|
||||
|
||||
# groupArraySorted {#groupArraySorted}
|
||||
|
||||
Returns an array with the first N items in ascending order.
|
||||
|
||||
``` sql
|
||||
groupArraySorted(N)(column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `N` – The number of elements to return.
|
||||
|
||||
If the parameter is omitted, default value is the size of input.
|
||||
|
||||
- `column` – The value (Integer, String, Float and other Generic types).
|
||||
|
||||
**Example**
|
||||
|
||||
Gets the first 10 numbers:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number)─┐
|
||||
│ [0,1,2,3,4,5,6,7,8,9] │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
Gets all the String implementations of all numbers in column:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
|
||||
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(str)────────┐
|
||||
│ ['0','1','2','3','4'] │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](./grouparraysample.md)
|
||||
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
|
@ -56,7 +56,7 @@ Functions:
|
||||
|
||||
## Related content
|
||||
|
||||
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
|
||||
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
|
||||
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
|
||||
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
|
||||
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
|
||||
|
@ -1083,7 +1083,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [arrayFold](#arrayFold)
|
||||
- [arrayFold](#arrayfold)
|
||||
|
||||
## arrayReduceInRanges
|
||||
|
||||
@ -1175,7 +1175,7 @@ FROM numbers(1,10);
|
||||
|
||||
**See also**
|
||||
|
||||
- [arrayReduce](#arrayReduce)
|
||||
- [arrayReduce](#arrayreduce)
|
||||
|
||||
## arrayReverse(arr)
|
||||
|
||||
|
@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
|
||||
Returns time and date values according to the determined format.
|
||||
|
||||
**Replacement fields**
|
||||
|
||||
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
|
||||
|
||||
| Placeholder | Description | Example |
|
||||
| Placeholder | Description | Example |
|
||||
|----------|---------------------------------------------------------|------------|
|
||||
| %a | abbreviated weekday name (Mon-Sun) | Mon |
|
||||
| %b | abbreviated month name (Jan-Dec) | Jan |
|
||||
| %c | month as an integer number (01-12) | 01 |
|
||||
| %c | month as an integer number (01-12), see 'Note 3' below | 01 |
|
||||
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
|
||||
| %d | day of the month, zero-padded (01-31) | 02 |
|
||||
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
|
||||
@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %i | minute (00-59) | 33 |
|
||||
| %I | hour in 12h format (01-12) | 10 |
|
||||
| %j | day of the year (001-366) | 002 |
|
||||
| %k | hour in 24h format (00-23) | 22 |
|
||||
| %l | hour in 12h format (01-12) | 09 |
|
||||
| %k | hour in 24h format (00-23), see 'Note 3' below | 14 |
|
||||
| %l | hour in 12h format (01-12), see 'Note 3' below | 09 |
|
||||
| %m | month as an integer number (01-12) | 01 |
|
||||
| %M | full month name (January-December), see 'Note 2' below | January |
|
||||
| %n | new-line character (‘’) | |
|
||||
@ -2579,6 +2580,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
|
||||
|
||||
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
|
||||
|
||||
Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
|
@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
|
||||
└──────────┴──────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
|
||||
The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
|
||||
|
||||
Get the first available contact method for the customer from the contact list:
|
||||
|
||||
|
@ -67,7 +67,45 @@ WHERE macro = 'test';
|
||||
│ test │ Value │
|
||||
└───────┴──────────────┘
|
||||
```
|
||||
|
||||
## getClientHTTPHeader
|
||||
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getClientHTTPHeader(name);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Value of the specified header.
|
||||
Type:[String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
|
||||
```sql
|
||||
SELECT getCientHTTPHeader('test')
|
||||
```
|
||||
result:
|
||||
|
||||
```text
|
||||
┌─getClientHTTPHeader('test')─┐
|
||||
│ │
|
||||
└────────────------───────────┘
|
||||
```
|
||||
Try to use http request:
|
||||
```shell
|
||||
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
|
||||
|
||||
#result
|
||||
default
|
||||
```
|
||||
|
||||
## FQDN
|
||||
|
||||
Returns the fully qualified domain name of the ClickHouse server.
|
||||
|
@ -5,7 +5,7 @@ slug: /en/sql-reference/operators/exists
|
||||
|
||||
The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`.
|
||||
|
||||
`EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
|
||||
`EXISTS` can also be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
|
||||
|
||||
:::tip
|
||||
References to main query tables and columns are not supported in a subquery.
|
||||
@ -13,12 +13,26 @@ References to main query tables and columns are not supported in a subquery.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
WHERE EXISTS(subquery)
|
||||
``` sql
|
||||
EXISTS(subquery)
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query checking existence of values in a subquery:
|
||||
|
||||
``` sql
|
||||
SELECT EXISTS(SELECT * FROM numbers(10) WHERE number > 8), EXISTS(SELECT * FROM numbers(10) WHERE number > 11)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─in(1, _subquery1)─┬─in(1, _subquery2)─┐
|
||||
│ 1 │ 0 │
|
||||
└───────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
Query with a subquery returning several rows:
|
||||
|
||||
``` sql
|
||||
|
@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
|
||||
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
|
||||
```
|
||||
|
||||
In the query, specify a list of one or more comma-separated actions.
|
||||
|
@ -16,6 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data:
|
||||
- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md)
|
||||
- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md)
|
||||
- [TTL](/docs/en/sql-reference/statements/alter/ttl.md)
|
||||
- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md)
|
||||
|
||||
:::note
|
||||
Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md).
|
||||
|
25
docs/en/sql-reference/statements/alter/statistic.md
Normal file
25
docs/en/sql-reference/statements/alter/statistic.md
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/statistic
|
||||
sidebar_position: 45
|
||||
sidebar_label: STATISTIC
|
||||
---
|
||||
|
||||
# Manipulating Column Statistics
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
@ -415,7 +415,7 @@ ExpressionTransform
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromStorage)
|
||||
NumbersMt × 2 0 → 1
|
||||
NumbersRange × 2 0 → 1
|
||||
```
|
||||
### EXPLAIN ESTIMATE
|
||||
|
||||
|
@ -67,6 +67,12 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
|
||||
|
@ -191,12 +191,13 @@ Query the total number of rows from all files `file002` inside any folder in dir
|
||||
SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
|
||||
```
|
||||
|
||||
## Virtual Columns
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
|
||||
## Settings
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
85
docs/en/sql-reference/table-functions/fileCluster.md
Normal file
85
docs/en/sql-reference/table-functions/fileCluster.md
Normal file
@ -0,0 +1,85 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/fileCluster
|
||||
sidebar_position: 61
|
||||
sidebar_label: fileCluster
|
||||
---
|
||||
|
||||
# fileCluster Table Function
|
||||
|
||||
Enables simultaneous processing of files matching a specified path across multiple nodes within a cluster. The initiator establishes connections to worker nodes, expands globs in the file path, and delegates file-reading tasks to worker nodes. Each worker node is querying the initiator for the next file to process, repeating until all tasks are completed (all files are read).
|
||||
|
||||
:::note
|
||||
This function will operate _correctly_ only in case the set of files matching the initially specified path is identical across all nodes, and their content is consistent among different nodes.
|
||||
In case these files differ between nodes, the return value cannot be predetermined and depends on the order in which worker nodes request tasks from the initiator.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
fileCluster(cluster_name, path[, format, structure, compression_method])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path).
|
||||
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified format and structure and with data from files matching the specified path.
|
||||
|
||||
**Example**
|
||||
|
||||
Given a cluster named `my_cluster` and given the following value of setting `user_files_path`:
|
||||
|
||||
``` bash
|
||||
$ grep user_files_path /etc/clickhouse-server/config.xml
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
```
|
||||
Also, given there are files `test1.csv` and `test2.csv` inside `user_files_path` of each cluster node, and their content is identical across different nodes:
|
||||
```bash
|
||||
$ cat /var/lib/clickhouse/user_files/test1.csv
|
||||
1,"file1"
|
||||
11,"file11"
|
||||
|
||||
$ cat /var/lib/clickhouse/user_files/test1.csv
|
||||
2,"file2"
|
||||
22,"file22"
|
||||
```
|
||||
|
||||
For example, one can create these files by executing these two queries on every cluster node:
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
|
||||
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
|
||||
```
|
||||
|
||||
Now, read data contents of `test1.csv` and `test2.csv` via `fileCluster` table function:
|
||||
|
||||
```sql
|
||||
SELECT * from fileCluster(
|
||||
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
|
||||
)
|
||||
```
|
||||
|
||||
```
|
||||
┌──i─┬─s──────┐
|
||||
│ 1 │ file1 │
|
||||
│ 11 │ file11 │
|
||||
└────┴────────┘
|
||||
┌──i─┬─s──────┐
|
||||
│ 2 │ file2 │
|
||||
│ 22 │ file22 │
|
||||
└────┴────────┘
|
||||
```
|
||||
|
||||
|
||||
## Globs in Path {#globs_in_path}
|
||||
|
||||
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [File table function](../../sql-reference/table-functions/file.md)
|
86
docs/en/sql-reference/table-functions/fuzzJSON.md
Normal file
86
docs/en/sql-reference/table-functions/fuzzJSON.md
Normal file
@ -0,0 +1,86 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/fuzzJSON
|
||||
sidebar_position: 75
|
||||
sidebar_label: fuzzJSON
|
||||
---
|
||||
|
||||
# fuzzJSON
|
||||
|
||||
Perturbs a JSON string with random variations.
|
||||
|
||||
``` sql
|
||||
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
|
||||
- `option=value` - Named collection optional parameters and their values.
|
||||
- `json_str` (String) - The source string representing structured data in JSON format.
|
||||
- `random_seed` (UInt64) - Manual random seed for producing stable results.
|
||||
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
|
||||
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
|
||||
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
|
||||
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
|
||||
- `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
|
||||
- `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
|
||||
- `max_string_value_length` (UInt64) - The maximum length of a String value.
|
||||
- `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
|
||||
- `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A table object with a a single column containing perturbed JSON strings.
|
||||
|
||||
## Usage Example
|
||||
|
||||
``` sql
|
||||
CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
|
||||
SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"52Xz2Zd4vKNcuP2":true}
|
||||
{"UPbOhOQAdPKIg91":3405264103600403024}
|
||||
{"X0QUWu8yT":[]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
|
||||
{"BRE3":true}
|
||||
{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
|
||||
{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
|
||||
{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"students":["Alice", "Bob"], "BREhhXj5":true}
|
||||
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
|
||||
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
|
||||
{"BRjE":16137826149911306846}
|
||||
{"XjKE":15076727133550123563}
|
||||
```
|
@ -94,8 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
|
||||
## Virtual Columns
|
||||
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
|
@ -17,6 +17,8 @@ The following queries are equivalent:
|
||||
SELECT * FROM numbers(10);
|
||||
SELECT * FROM numbers(0, 10);
|
||||
SELECT * FROM system.numbers LIMIT 10;
|
||||
SELECT * FROM system.numbers WHERE number BETWEEN 0 AND 9;
|
||||
SELECT * FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
@ -228,6 +228,12 @@ FROM s3(
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -50,8 +50,9 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
|
||||
## Virtual Columns
|
||||
|
||||
- `_path` — Path to the `URL`.
|
||||
- `_file` — Resource name of the `URL`.
|
||||
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
|
@ -14,7 +14,7 @@ ClickHouse предоставляет собственный клиент ком
|
||||
$ clickhouse-client
|
||||
ClickHouse client version 20.13.1.5273 (official build).
|
||||
Connecting to localhost:9000 as user default.
|
||||
Connected to ClickHouse server version 20.13.1 revision 54442.
|
||||
Connected to ClickHouse server version 20.13.1.
|
||||
|
||||
:)
|
||||
```
|
||||
|
@ -1215,6 +1215,7 @@ ClickHouse использует потоки из глобального пул
|
||||
- `metrics` – флаг для экспорта текущих значений метрик из таблицы [system.metrics](../system-tables/metrics.md#system_tables-metrics).
|
||||
- `events` – флаг для экспорта текущих значений метрик из таблицы [system.events](../system-tables/events.md#system_tables-events).
|
||||
- `asynchronous_metrics` – флаг для экспорта текущих значений значения метрик из таблицы [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics).
|
||||
- `errors` - флаг для экспорта количества ошибок (по кодам) случившихся с момента последнего рестарта сервера. Эта информация может быть получена из таблицы [system.errors](../system-tables/asynchronous_metrics.md#system_tables-errors)
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -1225,6 +1226,7 @@ ClickHouse использует потоки из глобального пул
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
<errors>true</errors>
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
@ -1676,7 +1678,7 @@ TCP порт для защищённого обмена данными с кли
|
||||
|
||||
## user_files_path {#server_configuration_parameters-user_files_path}
|
||||
|
||||
Каталог с пользовательскими файлами. Используется в табличной функции [file()](../../operations/server-configuration-parameters/settings.md).
|
||||
Каталог с пользовательскими файлами. Используется в табличных функциях [file()](../../sql-reference/table-functions/fileCluster.md) и [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -119,7 +119,7 @@ Eсли суммарное число активных кусков во все
|
||||
- Положительное целое число.
|
||||
- 0 (без ограничений).
|
||||
|
||||
Значение по умолчанию: 100.
|
||||
Значение по умолчанию: 1000.
|
||||
|
||||
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
|
||||
Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм.
|
||||
|
@ -19,7 +19,7 @@ ClickHouse создает эту таблицу когда установлен
|
||||
|
||||
- `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse.
|
||||
|
||||
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
|
||||
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
|
||||
|
||||
- `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки:
|
||||
|
||||
|
@ -11,7 +11,7 @@ sidebar_label: "Манипуляции со столбцами"
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
|
||||
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
|
||||
```
|
||||
|
||||
В запросе можно указать сразу несколько действий над одной таблицей через запятую.
|
||||
|
@ -371,7 +371,7 @@ ExpressionTransform
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromStorage)
|
||||
NumbersMt × 2 0 → 1
|
||||
NumbersRange × 2 0 → 1
|
||||
```
|
||||
|
||||
### EXPLAIN ESTIMATE {#explain-estimate}
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: file
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure])
|
||||
file(path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
@ -21,6 +21,7 @@ file(path [,format] [,structure])
|
||||
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
|
||||
- `format` — [формат](../../interfaces/formats.md#formats) файла.
|
||||
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
|
||||
- `compression` — Используемый тип сжатия для запроса SELECT или желаемый тип сжатия для запроса INSERT. Поддерживаемые типы сжатия: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
|
84
docs/ru/sql-reference/table-functions/fileCluster.md
Normal file
84
docs/ru/sql-reference/table-functions/fileCluster.md
Normal file
@ -0,0 +1,84 @@
|
||||
---
|
||||
slug: /ru/sql-reference/table-functions/fileCluster
|
||||
sidebar_position: 38
|
||||
sidebar_label: fileCluster
|
||||
---
|
||||
|
||||
# fileCluster
|
||||
|
||||
Позволяет одновременно обрабатывать файлы, находящиеся по указанному пути, на нескольких узлах внутри кластера. Узел-инициатор устанавливает соединения с рабочими узлами (worker nodes), раскрывает шаблоны в пути к файлам и отдаёт задачи по чтению файлов рабочим узлам. Рабочий узел запрашивает у инициатора путь к следующему файлу для обработки, повторяя до тех пор, пока не завершатся все задачи (то есть пока не будут обработаны все файлы).
|
||||
|
||||
:::note
|
||||
Эта табличная функция будет работать _корректно_ только в случае, если набор файлов, соответствующих изначально указанному пути, одинаков на всех узлах и содержание этих файлов идентично на различных узлах. В случае, если эти файлы различаются между узлами, результат не предопределён и зависит от очерёдности, с которой рабочие узлы будут запрашивать задачи у инициатора.
|
||||
:::
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
fileCluster(cluster_name, path[, format, structure, compression_method])
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам.
|
||||
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает [шаблоны поискаglobs](#globs_in_path).
|
||||
- `format` — [формат](../../interfaces/formats.md#formats) файла.
|
||||
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
|
||||
- `compression_method` — Используемый тип сжатия. Поддерживаемые типы: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Таблица с указанным форматом и структурой, содержащая данные из файлов, соответствующих указанному пути.
|
||||
|
||||
**Пример**
|
||||
Пусть есть кластер с именем `my_cluster`, а также установлено нижеследующее значение параметра `user_files_path`:
|
||||
|
||||
``` bash
|
||||
$ grep user_files_path /etc/clickhouse-server/config.xml
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
```
|
||||
|
||||
Пусть также на каждом узле кластера в директории `user_files_path` находятся файлы `test1.csv` и `test2.csv`, и их содержимое идентично на разных узлах:
|
||||
```bash
|
||||
$ cat /var/lib/clickhouse/user_files/test1.csv
|
||||
1,"file1"
|
||||
11,"file11"
|
||||
|
||||
$ cat /var/lib/clickhouse/user_files/test1.csv
|
||||
2,"file2"
|
||||
22,"file22"
|
||||
```
|
||||
|
||||
Например, эти файлы можно создать, выполнив на каждом узле два запроса:
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
|
||||
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
|
||||
```
|
||||
|
||||
Прочитаем содержимое файлов `test1.csv` и `test2.csv` с помощью табличной функции `fileCluster`:
|
||||
|
||||
```sql
|
||||
SELECT * from fileCluster(
|
||||
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
|
||||
)
|
||||
```
|
||||
|
||||
```
|
||||
┌──i─┬─s──────┐
|
||||
│ 1 │ file1 │
|
||||
│ 11 │ file11 │
|
||||
└────┴────────┘
|
||||
┌──i─┬─s──────┐
|
||||
│ 2 │ file2 │
|
||||
│ 22 │ file22 │
|
||||
└────┴────────┘
|
||||
```
|
||||
|
||||
|
||||
## Шаблоны поиска в компонентах пути {#globs_in_path}
|
||||
|
||||
Поддерживаются все шаблоны поиска, что поддерживаются табличной функцией [File](../../sql-reference/table-functions/file.md#globs-in-path).
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [File (табличная функция)](../../sql-reference/table-functions/file.md)
|
@ -14,7 +14,7 @@ ClickHouse提供了一个原生命令行客户端`clickhouse-client`客户端支
|
||||
$ clickhouse-client
|
||||
ClickHouse client version 19.17.1.1579 (official build).
|
||||
Connecting to localhost:9000 as user default.
|
||||
Connected to ClickHouse server version 19.17.1 revision 54428.
|
||||
Connected to ClickHouse server version 19.17.1.
|
||||
|
||||
:)
|
||||
```
|
||||
|
@ -22,7 +22,7 @@ ClickHouse创建此表时 [trace_log](../../operations/server-configuration-para
|
||||
|
||||
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
|
||||
|
||||
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。
|
||||
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1.`. 该字段包含 `revision`,但不是 `version` 的服务器。
|
||||
|
||||
- `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type:
|
||||
|
||||
|
@ -44,6 +44,8 @@ contents:
|
||||
dst: /usr/bin/clickhouse-odbc-bridge
|
||||
- src: root/usr/share/bash-completion/completions
|
||||
dst: /usr/share/bash-completion/completions
|
||||
- src: root/usr/share/clickhouse
|
||||
dst: /usr/share/clickhouse
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static/AUTHORS
|
||||
|
@ -457,3 +457,10 @@ endif()
|
||||
if (ENABLE_FUZZING)
|
||||
add_compile_definitions(FUZZING_MODE=1)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::protobuf)
|
||||
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
|
||||
foreach (proto_file IN LISTS google_proto_files)
|
||||
install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
|
||||
endforeach()
|
||||
endif ()
|
||||
|
@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
/// Set path for format schema files
|
||||
if (config().has("format_schema_path"))
|
||||
global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
|
||||
|
||||
/// Set the path for google proto files
|
||||
if (config().has("google_protos_path"))
|
||||
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
|
||||
}
|
||||
|
||||
|
||||
@ -489,8 +493,7 @@ void Client::connect()
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "."
|
||||
<< std::endl << std::endl;
|
||||
std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl;
|
||||
|
||||
auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
|
||||
auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch);
|
||||
|
@ -37,7 +37,7 @@
|
||||
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
|
||||
</prompt_by_server_display_name>
|
||||
|
||||
<!--
|
||||
<!--
|
||||
Settings adjustable via command-line parameters
|
||||
can take their defaults from that config file, see examples:
|
||||
|
||||
@ -58,6 +58,9 @@
|
||||
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
|
||||
-->
|
||||
|
||||
<!-- Directory containing the proto files for the well-known Protobuf types.
|
||||
-->
|
||||
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
|
||||
|
||||
<!-- Analog of .netrc -->
|
||||
<![CDATA[
|
||||
|
@ -41,6 +41,7 @@
|
||||
<min_session_timeout_ms>10000</min_session_timeout_ms>
|
||||
<session_timeout_ms>100000</session_timeout_ms>
|
||||
<raft_logs_level>information</raft_logs_level>
|
||||
<compress_logs>false</compress_logs>
|
||||
<!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
|
||||
</coordination_settings>
|
||||
|
||||
|
@ -1279,6 +1279,8 @@ try
|
||||
global_context->setHTTPHeaderFilter(*config);
|
||||
|
||||
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
|
||||
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
|
||||
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
|
||||
|
||||
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
|
||||
@ -1575,6 +1577,10 @@ try
|
||||
global_context->setFormatSchemaPath(format_schema_path);
|
||||
fs::create_directories(format_schema_path);
|
||||
|
||||
/// Set the path for google proto files
|
||||
if (config().has("google_protos_path"))
|
||||
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
|
||||
|
||||
/// Set path for filesystem caches
|
||||
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
|
||||
if (!filesystem_caches_path.empty())
|
||||
|
@ -3,6 +3,7 @@
|
||||
<tmp_path replace="replace">./tmp/</tmp_path>
|
||||
<user_files_path replace="replace">./user_files/</user_files_path>
|
||||
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
|
||||
<google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
|
||||
<access_control_path replace="replace">./access/</access_control_path>
|
||||
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
|
||||
</clickhouse>
|
||||
|
@ -1428,6 +1428,10 @@
|
||||
-->
|
||||
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
|
||||
|
||||
<!-- Directory containing the proto files for the well-known Protobuf types.
|
||||
-->
|
||||
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
|
||||
|
||||
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
|
||||
(both text logs and system.query_log).
|
||||
name - name for the rule (optional)
|
||||
|
@ -51,6 +51,11 @@ enum class AccessType
|
||||
M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \
|
||||
M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\
|
||||
\
|
||||
M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \
|
||||
M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\
|
||||
\
|
||||
M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
M(ALTER_MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION", TABLE, ALTER_PROJECTION) \
|
||||
|
@ -1,26 +1,213 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct AggregateFunctionAnyRespectNullsData
|
||||
{
|
||||
enum Status : UInt8
|
||||
{
|
||||
NotSet = 1,
|
||||
SetNull = 2,
|
||||
SetOther = 3
|
||||
};
|
||||
|
||||
Status status = Status::NotSet;
|
||||
Field value;
|
||||
|
||||
bool isSet() const { return status != Status::NotSet; }
|
||||
void setNull() { status = Status::SetNull; }
|
||||
void setOther() { status = Status::SetOther; }
|
||||
};
|
||||
|
||||
template <bool First>
|
||||
class AggregateFunctionAnyRespectNulls final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
|
||||
{
|
||||
public:
|
||||
using Data = AggregateFunctionAnyRespectNullsData;
|
||||
|
||||
SerializationPtr serialization;
|
||||
const bool returns_nullable_type = false;
|
||||
|
||||
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
|
||||
, serialization(type->getDefaultSerialization())
|
||||
, returns_nullable_type(type->isNullable())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
if constexpr (First)
|
||||
return "any_respect_nulls";
|
||||
else
|
||||
return "anyLast_respect_nulls";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void addNull(AggregateDataPtr __restrict place) const
|
||||
{
|
||||
chassert(returns_nullable_type);
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setNull();
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
{
|
||||
if (columns[0]->isNullAt(row_num))
|
||||
return addNull(place);
|
||||
}
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setOther();
|
||||
columns[0]->get(row_num, d.value);
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
addNull(place);
|
||||
else
|
||||
add(place, columns, 0, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
size_t size = row_end - row_begin;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t pos = First ? row_begin + i : row_end - 1 - i;
|
||||
if (flags[pos])
|
||||
{
|
||||
add(place, columns, pos, arena);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t pos = First ? row_begin : row_end - 1;
|
||||
add(place, columns, pos, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
|
||||
{
|
||||
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
|
||||
/// have discarded values that we need (NULLs)
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
|
||||
auto & other = this->data(rhs);
|
||||
if (other.isSet())
|
||||
{
|
||||
d.status = other.status;
|
||||
d.value = other.value;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = d.status;
|
||||
|
||||
writeBinaryLittleEndian<UInt8>(k, buf);
|
||||
if (k == Data::Status::SetOther)
|
||||
serialization->serializeBinary(d.value, buf, {});
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = Data::Status::NotSet;
|
||||
readBinaryLittleEndian<UInt8>(k, buf);
|
||||
d.status = static_cast<Data::Status>(k);
|
||||
if (d.status == Data::Status::NotSet)
|
||||
return;
|
||||
else if (d.status == Data::Status::SetNull)
|
||||
{
|
||||
if (!returns_nullable_type)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
|
||||
return;
|
||||
}
|
||||
else if (d.status == Data::Status::SetOther)
|
||||
serialization->deserializeBinary(d.value, buf, {});
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (d.status == Data::Status::SetOther)
|
||||
to.insert(d.value);
|
||||
else
|
||||
to.insertDefault();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & original_function,
|
||||
const DataTypes & /*arguments*/,
|
||||
const Array & /*params*/,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
{
|
||||
return original_function;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <bool First>
|
||||
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
|
||||
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
template <bool RespectNulls = false>
|
||||
AggregateFunctionPtr createAggregateFunctionNullableAny(
|
||||
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(
|
||||
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
|
||||
name, argument_types, parameters, settings));
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
@ -28,13 +215,10 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
template <bool RespectNulls = false>
|
||||
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
|
||||
AggregateFunctionsSingleValue,
|
||||
AggregateFunctionAnyLastData,
|
||||
RespectNulls>(name, argument_types, parameters, settings));
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
@ -46,26 +230,28 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
|
||||
|
||||
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
AggregateFunctionProperties default_properties_for_respect_nulls
|
||||
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
|
||||
|
||||
factory.registerFunction("any", { createAggregateFunctionAny, properties });
|
||||
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
|
||||
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
|
||||
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
|
||||
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
// Synonyms for use as window functions.
|
||||
factory.registerFunction("first_value",
|
||||
{ createAggregateFunctionAny, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("first_value_respect_nulls",
|
||||
{ createAggregateFunctionNullableAny<true>, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("last_value",
|
||||
{ createAggregateFunctionAnyLast, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("last_value_respect_nulls",
|
||||
{ createAggregateFunctionNullableAnyLast<true>, properties },
|
||||
AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
|
||||
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
|
||||
|
||||
factory.registerNullsActionTransformation("any", "any_respect_nulls");
|
||||
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilter(flags);
|
||||
data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -116,7 +116,7 @@ public:
|
||||
/// Return normalized state type: count()
|
||||
AggregateFunctionProperties properties;
|
||||
return std::make_shared<DataTypeAggregateFunction>(
|
||||
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
|
||||
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
@ -267,7 +267,7 @@ public:
|
||||
/// Return normalized state type: count()
|
||||
AggregateFunctionProperties properties;
|
||||
return std::make_shared<DataTypeAggregateFunction>(
|
||||
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
|
||||
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
|
@ -1,23 +1,11 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
|
||||
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
#include <Poco/String.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
|
||||
|
||||
@ -28,10 +16,11 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_AGGREGATE_FUNCTION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_AGGREGATION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int TOO_LARGE_STRING_SIZE;
|
||||
extern const int UNKNOWN_AGGREGATE_FUNCTION;
|
||||
}
|
||||
|
||||
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
|
||||
@ -59,6 +48,23 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
|
||||
}
|
||||
}
|
||||
|
||||
void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls)
|
||||
{
|
||||
if (!aggregate_functions.contains(source_ignores_nulls))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found");
|
||||
|
||||
if (!aggregate_functions.contains(target_respect_nulls))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found");
|
||||
|
||||
if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", source_ignores_nulls);
|
||||
|
||||
if (!ignore_nulls.emplace(target_respect_nulls, source_ignores_nulls).second)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", target_respect_nulls);
|
||||
}
|
||||
|
||||
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
|
||||
{
|
||||
DataTypes res_types;
|
||||
@ -70,7 +76,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
|
||||
}
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
|
||||
const String & name,
|
||||
NullsAction action,
|
||||
const DataTypes & argument_types,
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties) const
|
||||
{
|
||||
/// This to prevent costly string manipulation in parsing the aggregate function combinators.
|
||||
/// Example: avgArrayArrayArrayArray...(1000 times)...Array
|
||||
@ -81,8 +91,9 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
|
||||
/// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
|
||||
/// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
|
||||
/// they must handle the nullability themselves
|
||||
auto properties = tryGetProperties(name);
|
||||
/// they must handle the nullability themselves.
|
||||
/// Aggregate functions such as any_value_respect_nulls are considered window functions in that sense
|
||||
auto properties = tryGetProperties(name, action);
|
||||
bool is_window_function = properties.has_value() && properties->is_window_function;
|
||||
if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
|
||||
[](const auto & type) { return type->isNullable(); }))
|
||||
@ -98,8 +109,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
|
||||
[](const auto & type) { return type->onlyNull(); });
|
||||
|
||||
AggregateFunctionPtr nested_function = getImpl(
|
||||
name, nested_types, nested_parameters, out_properties, has_null_arguments);
|
||||
AggregateFunctionPtr nested_function = getImpl(name, action, nested_types, nested_parameters, out_properties, has_null_arguments);
|
||||
|
||||
// Pure window functions are not real aggregate functions. Applying
|
||||
// combinators doesn't make sense for them, they must handle the
|
||||
@ -110,22 +120,54 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters);
|
||||
}
|
||||
|
||||
auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false);
|
||||
auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
|
||||
|
||||
if (!with_original_arguments)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
|
||||
return with_original_arguments;
|
||||
}
|
||||
|
||||
std::optional<AggregateFunctionWithProperties>
|
||||
AggregateFunctionFactory::getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const
|
||||
{
|
||||
if (action == NullsAction::RESPECT_NULLS)
|
||||
{
|
||||
if (auto it = respect_nulls.find(name); it == respect_nulls.end())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} does not support RESPECT NULLS", name);
|
||||
else if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
|
||||
return {associated_it->second};
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} RESPECT NULLS')", it->second, name);
|
||||
}
|
||||
|
||||
if (action == NullsAction::IGNORE_NULLS)
|
||||
{
|
||||
if (auto it = ignore_nulls.find(name); it != ignore_nulls.end())
|
||||
{
|
||||
if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
|
||||
return {associated_it->second};
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} IGNORE NULLS')", it->second, name);
|
||||
}
|
||||
/// We don't throw for IGNORE NULLS of other functions because that's the default in CH
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
const String & name_param,
|
||||
NullsAction action,
|
||||
const DataTypes & argument_types,
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties,
|
||||
bool has_null_arguments) const
|
||||
{
|
||||
String name = getAliasToOrName(name_param);
|
||||
String case_insensitive_name;
|
||||
bool is_case_insensitive = false;
|
||||
Value found;
|
||||
|
||||
@ -135,10 +177,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
found = it->second;
|
||||
}
|
||||
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
|
||||
if (!found.creator)
|
||||
{
|
||||
found = jt->second;
|
||||
is_case_insensitive = true;
|
||||
case_insensitive_name = Poco::toLower(name);
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(case_insensitive_name); jt != case_insensitive_aggregate_functions.end())
|
||||
{
|
||||
found = jt->second;
|
||||
is_case_insensitive = true;
|
||||
}
|
||||
}
|
||||
|
||||
ContextPtr query_context;
|
||||
@ -147,11 +193,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
|
||||
if (found.creator)
|
||||
{
|
||||
out_properties = found.properties;
|
||||
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? case_insensitive_name : name, action);
|
||||
if (opt)
|
||||
found = *opt;
|
||||
|
||||
out_properties = found.properties;
|
||||
if (query_context && query_context->getSettingsRef().log_queries)
|
||||
query_context->addQueryFactoriesInfo(
|
||||
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
|
||||
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? case_insensitive_name : name);
|
||||
|
||||
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
|
||||
if (!out_properties.returns_default_when_only_null && has_null_arguments)
|
||||
@ -196,7 +245,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
DataTypes nested_types = combinator->transformArguments(argument_types);
|
||||
Array nested_parameters = combinator->transformParameters(parameters);
|
||||
|
||||
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties);
|
||||
AggregateFunctionPtr nested_function = get(nested_name, action, nested_types, nested_parameters, out_properties);
|
||||
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
|
||||
}
|
||||
|
||||
@ -213,16 +262,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionFactory::tryGet(
|
||||
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
|
||||
{
|
||||
return isAggregateFunctionName(name)
|
||||
? get(name, argument_types, parameters, out_properties)
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
|
||||
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name, NullsAction action) const
|
||||
{
|
||||
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
|
||||
@ -231,6 +271,8 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
|
||||
{
|
||||
name = getAliasToOrName(name);
|
||||
Value found;
|
||||
String lower_case_name;
|
||||
bool is_case_insensitive = false;
|
||||
|
||||
/// Find by exact match.
|
||||
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
|
||||
@ -238,11 +280,23 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
|
||||
found = it->second;
|
||||
}
|
||||
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
|
||||
found = jt->second;
|
||||
if (!found.creator)
|
||||
{
|
||||
lower_case_name = Poco::toLower(name);
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(lower_case_name); jt != case_insensitive_aggregate_functions.end())
|
||||
{
|
||||
is_case_insensitive = true;
|
||||
found = jt->second;
|
||||
}
|
||||
}
|
||||
|
||||
if (found.creator)
|
||||
{
|
||||
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? lower_case_name : name, action);
|
||||
if (opt)
|
||||
return opt->properties;
|
||||
return found.properties;
|
||||
}
|
||||
|
||||
/// Combinators of aggregate functions.
|
||||
/// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
|
||||
@ -262,27 +316,29 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
|
||||
}
|
||||
|
||||
|
||||
bool AggregateFunctionFactory::isAggregateFunctionName(String name) const
|
||||
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name_) const
|
||||
{
|
||||
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
|
||||
if (name_.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
|
||||
|
||||
while (true)
|
||||
if (aggregate_functions.contains(name_) || isAlias(name_))
|
||||
return true;
|
||||
|
||||
String name_lowercase = Poco::toLower(name_);
|
||||
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
|
||||
return true;
|
||||
|
||||
String name = name_;
|
||||
while (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
|
||||
{
|
||||
if (aggregate_functions.contains(name) || isAlias(name))
|
||||
return true;
|
||||
name = name.substr(0, name.size() - combinator->getName().size());
|
||||
name_lowercase = name_lowercase.substr(0, name_lowercase.size() - combinator->getName().size());
|
||||
|
||||
String name_lowercase = Poco::toLower(name);
|
||||
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
|
||||
if (aggregate_functions.contains(name) || isAlias(name) || case_insensitive_aggregate_functions.contains(name_lowercase)
|
||||
|| isAlias(name_lowercase))
|
||||
return true;
|
||||
|
||||
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
|
||||
{
|
||||
name = name.substr(0, name.size() - combinator->getName().size());
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
AggregateFunctionFactory & AggregateFunctionFactory::instance()
|
||||
|
@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/IFactoryWithAliases.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
#include <Parsers/NullsAction.h>
|
||||
#include <Common/IFactoryWithAliases.h>
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
@ -62,36 +62,44 @@ public:
|
||||
Value creator,
|
||||
CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
|
||||
/// Register how to transform from one aggregate function to other based on NullsAction
|
||||
/// Registers them both ways:
|
||||
/// SOURCE + RESPECT NULLS will be transformed to TARGET
|
||||
/// TARGET + IGNORE NULLS will be transformed to SOURCE
|
||||
void registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls);
|
||||
|
||||
/// Throws an exception if not found.
|
||||
AggregateFunctionPtr
|
||||
get(const String & name,
|
||||
const DataTypes & argument_types,
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties) const;
|
||||
|
||||
/// Returns nullptr if not found.
|
||||
AggregateFunctionPtr tryGet(
|
||||
const String & name,
|
||||
NullsAction action,
|
||||
const DataTypes & argument_types,
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties) const;
|
||||
|
||||
/// Get properties if the aggregate function exists.
|
||||
std::optional<AggregateFunctionProperties> tryGetProperties(String name) const;
|
||||
std::optional<AggregateFunctionProperties> tryGetProperties(String name, NullsAction action) const;
|
||||
|
||||
bool isAggregateFunctionName(String name) const;
|
||||
bool isAggregateFunctionName(const String & name) const;
|
||||
|
||||
private:
|
||||
AggregateFunctionPtr getImpl(
|
||||
const String & name,
|
||||
NullsAction action,
|
||||
const DataTypes & argument_types,
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties,
|
||||
bool has_null_arguments) const;
|
||||
|
||||
using AggregateFunctions = std::unordered_map<String, Value>;
|
||||
using ActionMap = std::unordered_map<String, String>;
|
||||
|
||||
AggregateFunctions aggregate_functions;
|
||||
/// Mapping from functions with `RESPECT NULLS` modifier to actual aggregate function names
|
||||
/// Example: `any(x) RESPECT NULLS` should be executed as function `any_respect_nulls`
|
||||
ActionMap respect_nulls;
|
||||
/// Same as above for `IGNORE NULLS` modifier
|
||||
ActionMap ignore_nulls;
|
||||
std::optional<AggregateFunctionWithProperties> getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const;
|
||||
|
||||
/// Case insensitive aggregate functions will be additionally added here with lowercased name.
|
||||
AggregateFunctions case_insensitive_aggregate_functions;
|
||||
|
82
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
82
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
|
||||
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
|
||||
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
|
||||
}
|
||||
|
||||
template <typename ... TArgs>
|
||||
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
|
||||
{
|
||||
if (auto res = createWithNumericOrTimeType<GroupArraySortedNumericImpl>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
|
||||
return AggregateFunctionPtr(res);
|
||||
|
||||
WhichDataType which(argument_type);
|
||||
return std::make_shared<GroupArraySortedGeneralImpl<GroupArraySortedNodeGeneral>>(argument_type, parameters, std::forward<TArgs>(args)...);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
UInt64 max_elems = std::numeric_limits<UInt64>::max();
|
||||
|
||||
if (parameters.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
|
||||
}
|
||||
else if (parameters.size() == 1)
|
||||
{
|
||||
auto type = parameters[0].getType();
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
|
||||
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function {} does not support this number of arguments", name);
|
||||
|
||||
return createAggregateFunctionGroupArraySortedImpl(argument_types[0], parameters, max_elems);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
|
||||
|
||||
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArraySorted, properties });
|
||||
}
|
||||
|
||||
}
|
355
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
355
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
@ -0,0 +1,355 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Functions/array/arraySort.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <base/sort.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <Common/RadixSort.h>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct GroupArraySortedData;
|
||||
|
||||
template <typename T>
|
||||
struct GroupArraySortedData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class GroupArraySortedNumericImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>
|
||||
{
|
||||
using Data = GroupArraySortedData<T>;
|
||||
UInt64 max_elems;
|
||||
SerializationPtr serialization;
|
||||
|
||||
public:
|
||||
explicit GroupArraySortedNumericImpl(
|
||||
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<GroupArraySortedData<T>, GroupArraySortedNumericImpl<T>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, max_elems(max_elems_)
|
||||
, serialization(data_type_->getDefaultSerialization())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArraySorted"; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
const auto & row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
|
||||
auto & cur_elems = this->data(place);
|
||||
|
||||
cur_elems.value.push_back(row_value, arena);
|
||||
|
||||
/// To optimize, we sort (2 * max_size) elements of input array over and over again
|
||||
/// and after each loop we delete the last half of sorted array
|
||||
if (cur_elems.value.size() >= max_elems * 2)
|
||||
{
|
||||
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
|
||||
cur_elems.value.resize(max_elems, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
|
||||
RadixSort<RadixSortNumTraits<T>>::executeLSD(cur_elems.value.data(), cur_elems.value.size());
|
||||
|
||||
size_t elems_size = cur_elems.value.size() < max_elems ? cur_elems.value.size() : max_elems;
|
||||
cur_elems.value.resize(elems_size, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
for (const auto & elem : value)
|
||||
writeBinaryLittleEndian(elem, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
|
||||
|
||||
auto & value = this->data(place).value;
|
||||
|
||||
value.resize(size, arena);
|
||||
for (auto & element : value)
|
||||
readBinaryLittleEndian(element, buf);
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||
{
|
||||
auto& value = this->data(place).value;
|
||||
|
||||
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
|
||||
size_t elems_size = value.size() < max_elems ? value.size() : max_elems;
|
||||
value.resize(elems_size, arena);
|
||||
size_t size = value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
|
||||
RadixSort<RadixSortNumTraits<T>>::executeLSD(value.data(), value.size());
|
||||
value.resize(elems_size, arena);
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
template <typename Node, bool has_sampler>
|
||||
struct GroupArraySortedGeneralData;
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArraySortedGeneralData<Node, false>
|
||||
{
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(Node *), 4096>;
|
||||
using Array = PODArray<Field, 32, Allocator>;
|
||||
|
||||
Array value;
|
||||
};
|
||||
|
||||
template <typename Node>
|
||||
struct GroupArraySortedNodeBase
|
||||
{
|
||||
UInt64 size; // size of payload
|
||||
|
||||
/// Returns pointer to actual payload
|
||||
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
|
||||
|
||||
const char * data() const { return reinterpret_cast<const char *>(this) + sizeof(Node); }
|
||||
};
|
||||
|
||||
struct GroupArraySortedNodeString : public GroupArraySortedNodeBase<GroupArraySortedNodeString>
|
||||
{
|
||||
using Node = GroupArraySortedNodeString;
|
||||
|
||||
};
|
||||
|
||||
struct GroupArraySortedNodeGeneral : public GroupArraySortedNodeBase<GroupArraySortedNodeGeneral>
|
||||
{
|
||||
using Node = GroupArraySortedNodeGeneral;
|
||||
|
||||
};
|
||||
|
||||
/// Implementation of groupArraySorted for Generic data via Array
|
||||
template <typename Node>
|
||||
class GroupArraySortedGeneralImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>
|
||||
{
|
||||
using Data = GroupArraySortedGeneralData<Node, false>;
|
||||
static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data *>(place); }
|
||||
static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast<const Data *>(place); }
|
||||
|
||||
DataTypePtr & data_type;
|
||||
UInt64 max_elems;
|
||||
SerializationPtr serialization;
|
||||
|
||||
|
||||
public:
|
||||
GroupArraySortedGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<GroupArraySortedGeneralData<Node, false>, GroupArraySortedGeneralImpl<Node>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
|
||||
, data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_)
|
||||
, serialization(data_type->getDefaultSerialization())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArraySorted"; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
|
||||
cur_elems.value.push_back(columns[0][0][row_num], arena);
|
||||
|
||||
/// To optimize, we sort (2 * max_size) elements of input array over and over again and
|
||||
/// after each loop we delete the last half of sorted array
|
||||
|
||||
if (cur_elems.value.size() >= max_elems * 2)
|
||||
{
|
||||
std::sort(cur_elems.value.begin(), cur_elems.value.begin() + (max_elems * 2));
|
||||
cur_elems.value.erase(cur_elems.value.begin() + max_elems, cur_elems.value.begin() + (max_elems * 2));
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = data(place);
|
||||
auto & rhs_elems = data(rhs);
|
||||
|
||||
if (rhs_elems.value.empty())
|
||||
return;
|
||||
|
||||
UInt64 new_elems = rhs_elems.value.size();
|
||||
|
||||
for (UInt64 i = 0; i < new_elems; ++i)
|
||||
cur_elems.value.push_back(rhs_elems.value[i], arena);
|
||||
|
||||
checkArraySize(cur_elems.value.size(), AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
|
||||
if (!cur_elems.value.empty())
|
||||
{
|
||||
std::sort(cur_elems.value.begin(), cur_elems.value.end());
|
||||
|
||||
if (cur_elems.value.size() > max_elems)
|
||||
cur_elems.value.resize(max_elems, arena);
|
||||
}
|
||||
}
|
||||
|
||||
static void checkArraySize(size_t elems, size_t max_elems)
|
||||
{
|
||||
if (unlikely(elems > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size {} (maximum: {})", elems, max_elems);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & value = data(place).value;
|
||||
size_t size = value.size();
|
||||
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
writeVarUInt(size, buf);
|
||||
|
||||
for (const Field & elem : value)
|
||||
{
|
||||
if (elem.isNull())
|
||||
{
|
||||
writeBinary(false, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeBinary(true, buf);
|
||||
serialization->serializeBinary(elem, buf, {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > max_elems))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
|
||||
|
||||
checkArraySize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
|
||||
auto & value = data(place).value;
|
||||
|
||||
value.resize(size, arena);
|
||||
for (Field & elem : value)
|
||||
{
|
||||
UInt8 is_null = 0;
|
||||
readBinary(is_null, buf);
|
||||
if (!is_null)
|
||||
serialization->deserializeBinary(elem, buf, {});
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||
{
|
||||
auto & column_array = assert_cast<ColumnArray &>(to);
|
||||
auto & value = data(place).value;
|
||||
|
||||
if (!value.empty())
|
||||
{
|
||||
std::sort(value.begin(), value.end());
|
||||
|
||||
if (value.size() > max_elems)
|
||||
value.resize_exact(max_elems, arena);
|
||||
}
|
||||
auto & offsets = column_array.getOffsets();
|
||||
offsets.push_back(offsets.back() + value.size());
|
||||
|
||||
auto & column_data = column_array.getData();
|
||||
|
||||
if (std::is_same_v<Node, GroupArraySortedNodeString>)
|
||||
{
|
||||
auto & string_offsets = assert_cast<ColumnString &>(column_data).getOffsets();
|
||||
string_offsets.reserve(string_offsets.size() + value.size());
|
||||
}
|
||||
|
||||
for (const Field& field : value)
|
||||
column_data.insert(field);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
|
||||
|
||||
}
|
@ -771,26 +771,18 @@ static_assert(
|
||||
|
||||
|
||||
/// For any other value types.
|
||||
template <bool RESULT_IS_NULLABLE = false>
|
||||
struct SingleValueDataGeneric
|
||||
{
|
||||
private:
|
||||
using Self = SingleValueDataGeneric;
|
||||
|
||||
Field value;
|
||||
bool has_value = false;
|
||||
|
||||
public:
|
||||
static constexpr bool result_is_nullable = RESULT_IS_NULLABLE;
|
||||
static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE;
|
||||
static constexpr bool result_is_nullable = false;
|
||||
static constexpr bool should_skip_null_arguments = true;
|
||||
static constexpr bool is_any = false;
|
||||
|
||||
bool has() const
|
||||
{
|
||||
if constexpr (result_is_nullable)
|
||||
return has_value;
|
||||
return !value.isNull();
|
||||
}
|
||||
bool has() const { return !value.isNull(); }
|
||||
|
||||
void insertResultInto(IColumn & to) const
|
||||
{
|
||||
@ -820,19 +812,9 @@ public:
|
||||
serialization.deserializeBinary(value, buf, {});
|
||||
}
|
||||
|
||||
void change(const IColumn & column, size_t row_num, Arena *)
|
||||
{
|
||||
column.get(row_num, value);
|
||||
if constexpr (result_is_nullable)
|
||||
has_value = true;
|
||||
}
|
||||
void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); }
|
||||
|
||||
void change(const Self & to, Arena *)
|
||||
{
|
||||
value = to.value;
|
||||
if constexpr (result_is_nullable)
|
||||
has_value = true;
|
||||
}
|
||||
void change(const Self & to, Arena *) { value = to.value; }
|
||||
|
||||
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
@ -847,7 +829,7 @@ public:
|
||||
|
||||
bool changeFirstTime(const Self & to, Arena * arena)
|
||||
{
|
||||
if (!has() && (result_is_nullable || to.has()))
|
||||
if (!has() && to.has())
|
||||
{
|
||||
change(to, arena);
|
||||
return true;
|
||||
@ -882,30 +864,15 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (result_is_nullable)
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (new_value < value)
|
||||
{
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (!value.isNull() && (new_value.isNull() || new_value < value))
|
||||
{
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (new_value < value)
|
||||
{
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -913,30 +880,13 @@ public:
|
||||
{
|
||||
if (!to.has())
|
||||
return false;
|
||||
if constexpr (result_is_nullable)
|
||||
if (!has() || to.value < value)
|
||||
{
|
||||
if (!has())
|
||||
{
|
||||
change(to, arena);
|
||||
return true;
|
||||
}
|
||||
if (to.value.isNull() || (!value.isNull() && to.value < value))
|
||||
{
|
||||
value = to.value;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
change(to, arena);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!has() || to.value < value)
|
||||
{
|
||||
change(to, arena);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
|
||||
@ -948,29 +898,15 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (result_is_nullable)
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (new_value > value)
|
||||
{
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (!value.isNull() && (new_value.isNull() || value < new_value))
|
||||
{
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Field new_value;
|
||||
column.get(row_num, new_value);
|
||||
if (new_value > value)
|
||||
{
|
||||
value = new_value;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -978,36 +914,18 @@ public:
|
||||
{
|
||||
if (!to.has())
|
||||
return false;
|
||||
if constexpr (result_is_nullable)
|
||||
if (!has() || to.value > value)
|
||||
{
|
||||
if (!value.isNull() && (to.value.isNull() || value < to.value))
|
||||
{
|
||||
value = to.value;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
change(to, arena);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!has() || to.value > value)
|
||||
{
|
||||
change(to, arena);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEqualTo(const IColumn & column, size_t row_num) const
|
||||
{
|
||||
return has() && value == column[row_num];
|
||||
}
|
||||
bool isEqualTo(const IColumn & column, size_t row_num) const { return has() && value == column[row_num]; }
|
||||
|
||||
bool isEqualTo(const Self & to) const
|
||||
{
|
||||
return has() && to.value == value;
|
||||
}
|
||||
bool isEqualTo(const Self & to) const { return has() && to.value == value; }
|
||||
|
||||
static bool allocatesMemoryInArena()
|
||||
{
|
||||
|
@ -150,7 +150,7 @@ public:
|
||||
AggregateFunctionProperties properties;
|
||||
return std::make_shared<DataTypeAggregateFunction>(
|
||||
AggregateFunctionFactory::instance().get(
|
||||
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), this->argument_types, params, properties),
|
||||
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), NullsAction::EMPTY, this->argument_types, params, properties),
|
||||
this->argument_types,
|
||||
params);
|
||||
}
|
||||
|
@ -142,6 +142,7 @@ struct AggregateFunctionSumData
|
||||
), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
|
||||
{
|
||||
ptr += start;
|
||||
condition_map += start;
|
||||
size_t count = end - start;
|
||||
const auto * end_ptr = ptr + count;
|
||||
|
||||
|
@ -20,7 +20,7 @@ template <template <typename> class Data>
|
||||
class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator
|
||||
{
|
||||
public:
|
||||
String getName() const override { return Data<SingleValueDataGeneric<>>::name(); }
|
||||
String getName() const override { return Data<SingleValueDataGeneric>::name(); }
|
||||
|
||||
DataTypes transformArguments(const DataTypes & arguments) const override
|
||||
{
|
||||
@ -66,7 +66,7 @@ public:
|
||||
if (which.idx == TypeIndex::String)
|
||||
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params);
|
||||
|
||||
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric<>>>>(nested_function, arguments, params);
|
||||
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric>>>(nested_function, arguments, params);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -33,6 +33,8 @@ class AggregateFunctionIf final : public IAggregateFunctionHelper<AggregateFunct
|
||||
private:
|
||||
AggregateFunctionPtr nested_func;
|
||||
size_t num_arguments;
|
||||
/// We accept Nullable(Nothing) as condition, but callees always expect UInt8 so we need to avoid calling them
|
||||
bool only_null_condition = false;
|
||||
|
||||
public:
|
||||
AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_)
|
||||
@ -42,7 +44,9 @@ public:
|
||||
if (num_arguments == 0)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require at least one argument", getName());
|
||||
|
||||
if (!isUInt8(types.back()) && !types.back()->onlyNull())
|
||||
only_null_condition = types.back()->onlyNull();
|
||||
|
||||
if (!isUInt8(types.back()) && !only_null_condition)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last argument for aggregate function {} must be UInt8", getName());
|
||||
}
|
||||
|
||||
@ -108,6 +112,8 @@ public:
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
if (only_null_condition)
|
||||
return;
|
||||
if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
|
||||
nested_func->add(place, columns, row_num, arena);
|
||||
}
|
||||
@ -121,6 +127,8 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
if (only_null_condition)
|
||||
return;
|
||||
nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
@ -132,6 +140,8 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
if (only_null_condition)
|
||||
return;
|
||||
nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
@ -144,6 +154,8 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
if (only_null_condition)
|
||||
return;
|
||||
nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
|
@ -447,7 +447,8 @@ public:
|
||||
{
|
||||
AggregateFunctionProperties out_properties;
|
||||
auto & aggr_func_factory = AggregateFunctionFactory::instance();
|
||||
return aggr_func_factory.get(nested_func_name + "MappedArrays", arguments, params, out_properties);
|
||||
auto action = NullsAction::EMPTY;
|
||||
return aggr_func_factory.get(nested_func_name + "MappedArrays", action, arguments, params, out_properties);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '{}Map' is not implemented for mapped arrays",
|
||||
|
@ -100,7 +100,16 @@ public:
|
||||
|
||||
if (has_null_types)
|
||||
{
|
||||
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
|
||||
/** Some functions, such as `count`, `uniq`, and others, return 0 :: UInt64 instead of NULL for a NULL argument.
|
||||
* These functions have the `returns_default_when_only_null` property, so we explicitly specify the result type
|
||||
* when replacing the function with `nothing`.
|
||||
*
|
||||
* Note: It's a bit dangerous to have the function result type depend on properties because we do not serialize properties in AST,
|
||||
* and we can lose this information. For example, when we have `count(NULL)` replaced with `nothing(NULL) as "count(NULL)"` and send it
|
||||
* to the remote server, the remote server will execute `nothing(NULL)` and return `NULL` while `0` is expected.
|
||||
*
|
||||
* To address this, we handle `nothing` in a special way in `FunctionNode::toASTImpl`.
|
||||
*/
|
||||
if (properties.returns_default_when_only_null)
|
||||
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
|
||||
else
|
||||
@ -144,11 +153,18 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
|
||||
#if 0
|
||||
if (serialize_flag)
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
|
||||
else
|
||||
/// This should be <false, false> (no serialize flag) but it was initially added incorrectly and
|
||||
/// changing it would break the binary compatibility of aggregation states using this method
|
||||
// (such as AggregateFunction(argMaxOrNull, Nullable(Int64), UInt64)). The extra flag is harmless
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -35,8 +35,8 @@ public:
|
||||
auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName());
|
||||
// Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type.
|
||||
AggregateFunctionProperties properties;
|
||||
auto function
|
||||
= AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties);
|
||||
auto function = AggregateFunctionFactory::instance().get(
|
||||
nested_->getName(), NullsAction::EMPTY, {storage_type_out}, nested_->getParameters(), properties);
|
||||
|
||||
// Need to make a clone because it'll be customized.
|
||||
auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName());
|
||||
|
@ -14,8 +14,9 @@ namespace DB
|
||||
struct Settings;
|
||||
|
||||
/// min, max, any, anyLast, anyHeavy, etc...
|
||||
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data>
|
||||
static IAggregateFunction * createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
template <template <typename> class AggregateFunctionTemplate, template <typename, bool...> class Data>
|
||||
static IAggregateFunction *
|
||||
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
@ -44,31 +45,9 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
|
||||
if (which.idx == TypeIndex::String)
|
||||
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
|
||||
|
||||
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
|
||||
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
|
||||
}
|
||||
|
||||
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
|
||||
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
// If the result value could be null (excluding the case that no row is matched),
|
||||
// use SingleValueDataGeneric.
|
||||
if constexpr (!RespectNulls)
|
||||
{
|
||||
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
/// argMin, argMax
|
||||
template <template <typename> class MinMaxData, typename ResData>
|
||||
static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type)
|
||||
@ -98,7 +77,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
|
||||
if (which.idx == TypeIndex::String)
|
||||
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
|
||||
|
||||
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
|
||||
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
|
||||
}
|
||||
|
||||
template <template <typename> class MinMaxData>
|
||||
@ -134,7 +113,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
|
||||
if (which.idx == TypeIndex::String)
|
||||
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
|
||||
|
||||
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
|
||||
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -289,15 +289,6 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const = 0;
|
||||
|
||||
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
|
||||
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
|
||||
* -Array combinator. It might also be used generally to break data dependency when array
|
||||
@ -586,31 +577,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchArray(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
|
@ -43,6 +43,7 @@ namespace ErrorCodes
|
||||
template <typename T>
|
||||
class QuantileTDigest
|
||||
{
|
||||
friend class TDigestStatistic;
|
||||
using Value = Float32;
|
||||
using Count = Float32;
|
||||
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
|
||||
@ -334,6 +335,44 @@ public:
|
||||
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
|
||||
}
|
||||
|
||||
Float64 getCountLessThan(Float64 value) const
|
||||
{
|
||||
bool first = true;
|
||||
Count sum = 0;
|
||||
Count prev_count = 0;
|
||||
Float64 prev_x = 0;
|
||||
Value prev_mean = 0;
|
||||
|
||||
for (const auto & c : centroids)
|
||||
{
|
||||
/// std::cerr << "c "<< c.mean << " "<< c.count << std::endl;
|
||||
Float64 current_x = sum + c.count * 0.5;
|
||||
if (c.mean >= value)
|
||||
{
|
||||
/// value is smaller than any value.
|
||||
if (first)
|
||||
return 0;
|
||||
|
||||
Float64 left = prev_x + 0.5 * (prev_count == 1);
|
||||
Float64 right = current_x - 0.5 * (c.count == 1);
|
||||
Float64 result = checkOverflow<Float64>(interpolate(
|
||||
static_cast<Value>(value),
|
||||
prev_mean,
|
||||
static_cast<Value>(left),
|
||||
c.mean,
|
||||
static_cast<Value>(right)));
|
||||
return result;
|
||||
}
|
||||
sum += c.count;
|
||||
prev_mean = c.mean;
|
||||
prev_count = c.count;
|
||||
prev_x = current_x;
|
||||
first = false;
|
||||
}
|
||||
/// count is larger than any value.
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Calculates the quantile q [0, 1] based on the digest.
|
||||
* For an empty digest returns NaN.
|
||||
*/
|
||||
|
@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
|
||||
@ -111,6 +112,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionDeltaSum(factory);
|
||||
registerAggregateFunctionDeltaSumTimestamp(factory);
|
||||
registerAggregateFunctionGroupArray(factory);
|
||||
registerAggregateFunctionGroupArraySorted(factory);
|
||||
registerAggregateFunctionGroupUniqArray(factory);
|
||||
registerAggregateFunctionGroupArrayInsertAt(factory);
|
||||
registerAggregateFunctionsQuantile(factory);
|
||||
|
@ -113,6 +113,11 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
|
||||
|
||||
buffer << ", function_type: " << function_type;
|
||||
|
||||
if (nulls_action == NullsAction::RESPECT_NULLS)
|
||||
buffer << ", nulls_action : RESPECT_NULLS";
|
||||
else if (nulls_action == NullsAction::IGNORE_NULLS)
|
||||
buffer << ", nulls_action : IGNORE_NULLS";
|
||||
|
||||
if (function)
|
||||
buffer << ", result_type: " + getResultType()->getName();
|
||||
|
||||
@ -140,10 +145,9 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
|
||||
bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
{
|
||||
const auto & rhs_typed = assert_cast<const FunctionNode &>(rhs);
|
||||
if (function_name != rhs_typed.function_name ||
|
||||
isAggregateFunction() != rhs_typed.isAggregateFunction() ||
|
||||
isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() ||
|
||||
isWindowFunction() != rhs_typed.isWindowFunction())
|
||||
if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction()
|
||||
|| isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || isWindowFunction() != rhs_typed.isWindowFunction()
|
||||
|| nulls_action != rhs_typed.nulls_action)
|
||||
return false;
|
||||
|
||||
if (isResolved() != rhs_typed.isResolved())
|
||||
@ -171,6 +175,7 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const
|
||||
hash_state.update(isOrdinaryFunction());
|
||||
hash_state.update(isAggregateFunction());
|
||||
hash_state.update(isWindowFunction());
|
||||
hash_state.update(nulls_action);
|
||||
|
||||
if (!isResolved())
|
||||
return;
|
||||
@ -192,6 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
|
||||
*/
|
||||
result_function->function = function;
|
||||
result_function->kind = kind;
|
||||
result_function->nulls_action = nulls_action;
|
||||
result_function->wrap_with_nullable = wrap_with_nullable;
|
||||
|
||||
return result_function;
|
||||
@ -202,6 +208,19 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
auto function_ast = std::make_shared<ASTFunction>();
|
||||
|
||||
function_ast->name = function_name;
|
||||
function_ast->nulls_action = nulls_action;
|
||||
|
||||
if (function_name == "nothing")
|
||||
{
|
||||
/** Inside AggregateFunctionCombinatorNull we may replace functions with `NULL` in arguments with `nothing`.
|
||||
* Result type of `nothing` depends on `returns_default_when_only_null` property of nested function.
|
||||
* If we convert `nothing` to AST, we will lose this information, so we use original function name instead.
|
||||
*/
|
||||
const auto & original_ast = getOriginalAST();
|
||||
const auto & original_function_ast = original_ast ? original_ast->as<ASTFunction>() : nullptr;
|
||||
if (original_function_ast)
|
||||
function_ast->name = original_function_ast->name;
|
||||
}
|
||||
|
||||
if (isWindowFunction())
|
||||
{
|
||||
|
@ -5,11 +5,12 @@
|
||||
#include <Analyzer/ConstantValue.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/IResolvedFunction.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Parsers/NullsAction.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -63,6 +64,10 @@ public:
|
||||
/// Get function name
|
||||
const String & getFunctionName() const { return function_name; }
|
||||
|
||||
/// Get NullAction modifier
|
||||
NullsAction getNullsAction() const { return nulls_action; }
|
||||
void setNullsAction(NullsAction action) { nulls_action = action; }
|
||||
|
||||
/// Get parameters
|
||||
const ListNode & getParameters() const { return children[parameters_child_index]->as<const ListNode &>(); }
|
||||
|
||||
@ -214,6 +219,7 @@ protected:
|
||||
private:
|
||||
String function_name;
|
||||
FunctionKind kind = FunctionKind::UNKNOWN;
|
||||
NullsAction nulls_action = NullsAction::EMPTY;
|
||||
IResolvedFunctionPtr function;
|
||||
bool wrap_with_nullable = false;
|
||||
|
||||
|
@ -278,6 +278,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
|
||||
if (it != replacement_map.end())
|
||||
continue;
|
||||
|
||||
node_clone->original_ast = node_to_clone->original_ast;
|
||||
node_clone->setAlias(node_to_clone->alias);
|
||||
node_clone->children = node_to_clone->children;
|
||||
node_clone->weak_pointers = node_to_clone->weak_pointers;
|
||||
@ -318,6 +319,7 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacem
|
||||
|
||||
*weak_pointer_ptr = it->second;
|
||||
}
|
||||
result_cloned_node_place->original_ast = original_ast;
|
||||
|
||||
return result_cloned_node_place;
|
||||
}
|
||||
|
@ -184,10 +184,9 @@ private:
|
||||
auto function_aggregate_function = function_node.getAggregateFunction();
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
|
||||
{ argument->getResultType() },
|
||||
function_aggregate_function->getParameters(),
|
||||
properties);
|
||||
auto action = NullsAction::EMPTY;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
|
@ -76,7 +76,8 @@ public:
|
||||
/// Replace `countDistinct` of initial query into `count`
|
||||
auto result_type = function_node->getResultType();
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
|
||||
auto action = NullsAction::EMPTY;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
function_node->getArguments().getNodes().clear();
|
||||
}
|
||||
|
@ -78,9 +78,11 @@ QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String
|
||||
return function_node;
|
||||
}
|
||||
|
||||
FunctionNodePtr createResolvedAggregateFunction(const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {})
|
||||
FunctionNodePtr createResolvedAggregateFunction(
|
||||
const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {}, NullsAction action = NullsAction::EMPTY)
|
||||
{
|
||||
auto function_node = std::make_shared<FunctionNode>(name);
|
||||
function_node->setNullsAction(action);
|
||||
|
||||
if (!parameters.empty())
|
||||
{
|
||||
@ -92,11 +94,7 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
|
||||
function_node->getArguments().getNodes() = { argument };
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
name,
|
||||
{ argument->getResultType() },
|
||||
parameters,
|
||||
properties);
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(name, action, {argument->getResultType()}, parameters, properties);
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
return function_node;
|
||||
|
@ -1,134 +0,0 @@
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/LambdaNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_move_functions_out_of_any)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
/// check function is any
|
||||
const auto & function_name = function_node->getFunctionName();
|
||||
if (function_name != "any" && function_name != "anyLast")
|
||||
return;
|
||||
|
||||
auto & arguments = function_node->getArguments().getNodes();
|
||||
if (arguments.size() != 1)
|
||||
return;
|
||||
|
||||
auto * inside_function_node = arguments[0]->as<FunctionNode>();
|
||||
|
||||
/// check argument is a function
|
||||
if (!inside_function_node)
|
||||
return;
|
||||
|
||||
/// check arguments can not contain arrayJoin or lambda
|
||||
if (!canRewrite(inside_function_node))
|
||||
return;
|
||||
|
||||
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
|
||||
|
||||
/// case any(f())
|
||||
if (inside_function_node_arguments.empty())
|
||||
return;
|
||||
|
||||
auto it = node_to_rewritten_node.find(node.get());
|
||||
if (it != node_to_rewritten_node.end())
|
||||
{
|
||||
node = it->second;
|
||||
return;
|
||||
}
|
||||
|
||||
/// checking done, rewrite function
|
||||
bool changed_argument = false;
|
||||
for (auto & inside_argument : inside_function_node_arguments)
|
||||
{
|
||||
if (inside_argument->as<ConstantNode>()) /// skip constant node
|
||||
break;
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
|
||||
|
||||
auto any_function = std::make_shared<FunctionNode>(function_name);
|
||||
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
auto & any_function_arguments = any_function->getArguments().getNodes();
|
||||
any_function_arguments.push_back(std::move(inside_argument));
|
||||
|
||||
inside_argument = std::move(any_function);
|
||||
changed_argument = true;
|
||||
}
|
||||
|
||||
if (changed_argument)
|
||||
{
|
||||
node_to_rewritten_node.emplace(node.get(), arguments[0]);
|
||||
node = arguments[0];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool canRewrite(const FunctionNode * function_node)
|
||||
{
|
||||
for (const auto & argument : function_node->getArguments().getNodes())
|
||||
{
|
||||
if (argument->as<LambdaNode>())
|
||||
return false;
|
||||
|
||||
if (const auto * inside_function = argument->as<FunctionNode>())
|
||||
{
|
||||
/// Function arrayJoin is special and should be skipped (think about it as
|
||||
/// an aggregate function), otherwise wrong result will be produced.
|
||||
/// For example:
|
||||
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
|
||||
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
|
||||
/// │ 0 │ [] │
|
||||
/// │ 0 │ [] │
|
||||
/// └────────┴────────────────────────────────────┘
|
||||
if (inside_function->getFunctionName() == "arrayJoin")
|
||||
return false;
|
||||
|
||||
if (!canRewrite(inside_function))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
|
||||
/// So we can reuse the rewritten function.
|
||||
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
|
||||
*
|
||||
* Example: SELECT any(f(x, y, g(z)));
|
||||
* Result: SELECT f(any(x), any(y), g(any(z)));
|
||||
*/
|
||||
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
|
||||
|
||||
String getDescription() override
|
||||
{
|
||||
return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
|
||||
}
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -56,7 +56,7 @@ private:
|
||||
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
|
@ -118,6 +118,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
|
||||
extern const int SYNTAX_ERROR;
|
||||
}
|
||||
|
||||
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
|
||||
@ -1208,7 +1209,8 @@ private:
|
||||
|
||||
static void expandGroupByAll(QueryNode & query_tree_node_typed);
|
||||
|
||||
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
|
||||
static std::string
|
||||
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
|
||||
|
||||
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
|
||||
{
|
||||
@ -2310,7 +2312,8 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
|
||||
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
|
||||
}
|
||||
|
||||
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context)
|
||||
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
|
||||
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
|
||||
{
|
||||
std::string result_aggregate_function_name = aggregate_function_name;
|
||||
auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name);
|
||||
@ -2337,7 +2340,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
|
||||
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
|
||||
if (need_add_or_null)
|
||||
{
|
||||
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
|
||||
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
|
||||
if (!properties->returns_default_when_only_null)
|
||||
result_aggregate_function_name += "OrNull";
|
||||
}
|
||||
@ -2349,7 +2352,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
|
||||
*/
|
||||
if (result_aggregate_function_name.ends_with("OrNull"))
|
||||
{
|
||||
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
|
||||
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
|
||||
if (function_properies && !function_properies->returns_default_when_only_null)
|
||||
{
|
||||
size_t function_name_size = result_aggregate_function_name.size();
|
||||
@ -4591,6 +4594,19 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
|
||||
return result_projection_names;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
|
||||
{
|
||||
if (node.getNullsAction() != NullsAction::EMPTY)
|
||||
throw Exception(
|
||||
ErrorCodes::SYNTAX_ERROR,
|
||||
"Function with name '{}' cannot use {} NULLS",
|
||||
node.getFunctionName(),
|
||||
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolve function node in scope.
|
||||
* During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function),
|
||||
* with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately.
|
||||
@ -4749,6 +4765,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
|
||||
if (is_special_function_exists)
|
||||
{
|
||||
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
|
||||
/// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1).
|
||||
auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0);
|
||||
|
||||
@ -4769,6 +4786,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
|
||||
if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty())
|
||||
{
|
||||
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
|
||||
/** Handle special case with constant If function, even if some of the arguments are invalid.
|
||||
*
|
||||
* SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers;
|
||||
@ -4834,6 +4852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
|
||||
if (is_special_function_in)
|
||||
{
|
||||
checkFunctionNodeHasEmptyNullsAction(function_node);
|
||||
if (scope.context->getSettingsRef().transform_null_in)
|
||||
{
|
||||
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
|
||||
@ -5012,6 +5031,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
lambda_expression_untyped->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
checkFunctionNodeHasEmptyNullsAction(function_node);
|
||||
|
||||
if (!parameters.empty())
|
||||
{
|
||||
throw Exception(
|
||||
@ -5041,6 +5062,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
"Function 'untuple' must have 1 argument. In scope {}",
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
checkFunctionNodeHasEmptyNullsAction(function_node);
|
||||
|
||||
const auto & untuple_argument = function_arguments[0];
|
||||
auto result_type = untuple_argument->getResultType();
|
||||
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
|
||||
@ -5091,6 +5114,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function GROUPING can have up to 64 arguments, but {} provided",
|
||||
function_arguments_size);
|
||||
checkFunctionNodeHasEmptyNullsAction(function_node);
|
||||
|
||||
bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility;
|
||||
auto grouping_function = std::make_shared<FunctionGrouping>(force_grouping_standard_compatibility);
|
||||
@ -5115,10 +5139,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
"Window function '{}' does not support lambda arguments",
|
||||
function_name);
|
||||
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
|
||||
auto action = function_node_ptr->getNullsAction();
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
|
||||
auto aggregate_function
|
||||
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
|
||||
|
||||
function_node.resolveAsWindowFunction(std::move(aggregate_function));
|
||||
|
||||
@ -5142,7 +5168,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
is_executable_udf = false;
|
||||
}
|
||||
|
||||
if (!function)
|
||||
if (function)
|
||||
{
|
||||
checkFunctionNodeHasEmptyNullsAction(function_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
|
||||
{
|
||||
@ -5181,10 +5211,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
"Aggregate function '{}' does not support lambda arguments",
|
||||
function_name);
|
||||
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
|
||||
auto action = function_node_ptr->getNullsAction();
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
|
||||
auto aggregate_function
|
||||
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
|
@ -97,6 +97,7 @@ private:
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
function_node.getFunctionName() + suffix,
|
||||
function_node.getNullsAction(),
|
||||
argument_types,
|
||||
function_node.getAggregateFunction()->getParameters(),
|
||||
properties);
|
||||
|
@ -157,10 +157,8 @@ private:
|
||||
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("countIf",
|
||||
{argument_type},
|
||||
function_node.getAggregateFunction()->getParameters(),
|
||||
properties);
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
|
@ -76,7 +76,9 @@ public:
|
||||
argument_types.emplace_back(function_node_argument->getResultType());
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(function_node->getFunctionName(),
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
function_node->getFunctionName(),
|
||||
NullsAction::EMPTY,
|
||||
argument_types,
|
||||
function_node->getAggregateFunction()->getParameters(),
|
||||
properties);
|
||||
|
@ -176,7 +176,7 @@ public:
|
||||
if (match_subquery_with_distinct() || match_subquery_with_group_by())
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
|
||||
|
||||
function_node->getArguments().getNodes().clear();
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
@ -607,6 +607,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
else
|
||||
{
|
||||
auto function_node = std::make_shared<FunctionNode>(function->name);
|
||||
function_node->setNullsAction(function->nulls_action);
|
||||
|
||||
if (function->parameters)
|
||||
{
|
||||
|
@ -44,7 +44,6 @@
|
||||
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
|
||||
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
|
||||
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
|
||||
|
||||
|
||||
@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
|
||||
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
|
||||
|
||||
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
|
||||
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
|
||||
|
||||
}
|
||||
|
@ -544,11 +544,8 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
|
||||
argument_types.emplace_back(function_node_argument->getResultType());
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
return AggregateFunctionFactory::instance().get(
|
||||
function_node->getFunctionName(),
|
||||
argument_types,
|
||||
parameters,
|
||||
properties);
|
||||
auto action = NullsAction::EMPTY;
|
||||
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -55,6 +55,10 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
|
||||
|
||||
callback(my_faulty_zookeeper);
|
||||
}
|
||||
else
|
||||
{
|
||||
my_faulty_zookeeper->setKeeper(zookeeper);
|
||||
}
|
||||
}
|
||||
|
||||
const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const
|
||||
|
@ -222,6 +222,7 @@ add_object_library(clickhouse_storages Storages)
|
||||
add_object_library(clickhouse_storages_mysql Storages/MySQL)
|
||||
add_object_library(clickhouse_storages_distributed Storages/Distributed)
|
||||
add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
|
||||
add_object_library(clickhouse_storages_statistics Storages/Statistics)
|
||||
add_object_library(clickhouse_storages_liveview Storages/LiveView)
|
||||
add_object_library(clickhouse_storages_windowview Storages/WindowView)
|
||||
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
|
||||
|
@ -2861,7 +2861,7 @@ void ClientBase::init(int argc, char ** argv)
|
||||
|
||||
("interactive", "Process queries-file or --query query and start interactive mode")
|
||||
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
|
||||
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
|
||||
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
|
||||
;
|
||||
|
||||
addOptions(options_description);
|
||||
@ -2996,10 +2996,12 @@ void ClientBase::init(int argc, char ** argv)
|
||||
clearPasswordFromCommandLine(argc, argv);
|
||||
|
||||
/// Limit on total memory usage
|
||||
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
|
||||
if (max_client_memory_usage != 0)
|
||||
std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/);
|
||||
if (max_client_memory_usage != "0")
|
||||
{
|
||||
total_memory_tracker.setHardLimit(max_client_memory_usage);
|
||||
UInt64 max_client_memory_usage_int = parseWithSizeSuffix<UInt64>(max_client_memory_usage.c_str(), max_client_memory_usage.length());
|
||||
|
||||
total_memory_tracker.setHardLimit(max_client_memory_usage_int);
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
}
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -384,6 +385,39 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
|
||||
// the generic recursion into IAST.children.
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzNullsAction(NullsAction & action)
|
||||
{
|
||||
/// If it's not using actions, then it's a high change it doesn't support it to begin with
|
||||
if ((action == NullsAction::EMPTY) && (fuzz_rand() % 100 == 0))
|
||||
{
|
||||
if (fuzz_rand() % 2 == 0)
|
||||
action = NullsAction::RESPECT_NULLS;
|
||||
else
|
||||
action = NullsAction::IGNORE_NULLS;
|
||||
}
|
||||
else if (fuzz_rand() % 20 == 0)
|
||||
{
|
||||
switch (fuzz_rand() % 3)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
action = NullsAction::EMPTY;
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
action = NullsAction::RESPECT_NULLS;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
action = NullsAction::IGNORE_NULLS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
|
||||
{
|
||||
switch (fuzz_rand() % 40)
|
||||
@ -966,6 +1000,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
fuzzColumnLikeExpressionList(fn->arguments.get());
|
||||
fuzzColumnLikeExpressionList(fn->parameters.get());
|
||||
|
||||
if (AggregateUtils::isAggregateFunction(*fn))
|
||||
fuzzNullsAction(fn->nulls_action);
|
||||
|
||||
if (fn->is_window_function && fn->window_definition)
|
||||
{
|
||||
auto & def = fn->window_definition->as<ASTWindowDefinition &>();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user