Merge branch 'master' into sync-with-private-2

This commit is contained in:
Alexey Milovidov 2024-03-03 04:42:14 +01:00
commit c411149364
377 changed files with 10139 additions and 4750 deletions

View File

@ -67,8 +67,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -77,8 +75,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################

View File

@ -73,8 +73,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -83,8 +81,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################

View File

@ -117,8 +117,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -127,8 +125,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################

View File

@ -68,8 +68,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -78,8 +76,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################

View File

@ -16,6 +16,7 @@
#ci_set_reduced
#ci_set_arm
#ci_set_integration
#ci_set_analyzer
## To run specified job in CI:
#job_<JOB NAME>

View File

@ -1,9 +1,165 @@
### Table of Contents
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>
# 2024 Changelog
### <a id="242"></a> ClickHouse release 24.2, 2024-02-29
#### Backward Incompatible Change
* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)).
* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)).
* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default, in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
#### New Feature
* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. So, a View will encapsulate the grants. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)).
* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)).
* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)).
* Allow to set up a quota for maximum sequential login failures. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* A new aggregate function `groupArrayIntersect`. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Backup & Restore support for `AzureBlobStorage`. Resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)).
* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)).
* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). This is an expert-level feature.
* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)).
* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)).
* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)).
* Support single-argument version for the `merge` table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)).
* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)).
* Support specifying a set of permitted users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)).
* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)).
#### Experimental Feature
* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). Keep in mind that the behavior will be changed in the next patch release.
* Add function `variantType` that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow concurrent table creation in the `Replicated` database during adding or recovering a new replica. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Implement comparison operator for `Variant` values and proper Field inserting into `Variant` column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)).
* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)).
#### Performance Improvement
* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)).
* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)).
* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)).
* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)).
* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)).
* Continue optimizing branch miss of `if` function when result type is `Float*/Decimal*/*Int*`, follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([李扬](https://github.com/taiyang-li)).
* Optimize `if` function when the input type is `Map`, the speed-up is up to ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([李扬](https://github.com/taiyang-li)).
* Improve performance of the `Int8` type by implementing strict aliasing (we already have it for `UInt8` and all other integer types). [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)).
* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([李扬](https://github.com/taiyang-li)).
* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)).
* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([李扬](https://github.com/taiyang-li)).
* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)).
* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)).
* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)).
#### Improvement
* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)).
* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. These settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). Actually not only for that - they can also lower memory usage at the expense of performance.
* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The issue (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)).
* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now precisely follows the semantics. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)).
* Enabled an exponential backoff logic for errors during mutations. It will reduce the CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Add improvement to count the `InitialQuery` Profile Event. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)).
* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)).
* Add support for the `Date32` type in the `T64` codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)).
* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)).
* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. `<distributed> <flush_on_detach>false</flush_on_detach> </distributed>`. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)).
* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)).
* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)).
* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)).
* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)).
* Allow to define a starting point for S3Queue with Ordered mode at the creation using a setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* `system.zookeeper` table: previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)).
* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)).
* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)).
* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix stack unwinding on Apple macOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434)
* Unify XML and SQL created named collection behaviour in Kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)).
* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)).
* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)).
* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)).
* Add new config setting `backups.remove_backup_files_after_failure`: `<clickhouse> <backups> <remove_backup_files_after_failure>true</remove_backup_files_after_failure> </backups> </clickhouse>`. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)).
* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)).
* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)).
* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update the Rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)).
* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)).
* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)).
* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix a "Non-ready set" error in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix a bug in the `quantilesGK` function [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)).
* Fix a wrong behavior with `intDiv` for Decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix `translate` with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).
* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)).
* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)).
* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)).
* Fix incorrect result of arrayElement / map on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)).
* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix KQL issue found by WingFuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)).
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)).
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)).
* RabbitMQ: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)).
* S3Queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix crash in JSONColumnsWithMetadata format over HTTP [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)).
* Do not rewrite sum to count if the return value differs in Analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)).
* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)).
* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)).
* Docker: run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)).
* Fix INSERT into `SQLite` with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)).
* Fix several logical errors in `arrayFold` [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)).
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible exception from S3Queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)).
* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix inconsistent formatting of queries containing tables named `table`. Fix wrong formatting of queries with `UNION ALL`, `INTERSECT`, and `EXCEPT` when their structure wasn't linear. This closes #52349. Fix wrong formatting of `SYSTEM` queries, including `SYSTEM ... DROP FILESYSTEM CACHE`, `SYSTEM ... REFRESH/START/STOP/CANCEL/TEST VIEW`, `SYSTEM ENABLE/DISABLE FAILPOINT`. Fix formatting of parameterized DDL queries. Fix the formatting of the `DESCRIBE FILESYSTEM CACHE` query. Fix incorrect formatting of the `SET param_...` (a query setting a parameter). Fix incorrect formatting of `CREATE INDEX` queries. Fix inconsistent formatting of `CREATE USER` and similar queries. Fix inconsistent formatting of `CREATE SETTINGS PROFILE`. Fix incorrect formatting of `ALTER ... MODIFY REFRESH`. Fix inconsistent formatting of window functions if frame offsets were expressions. Fix inconsistent formatting of `RESPECT NULLS` and `IGNORE NULLS` if they were used after a function that implements an operator (such as `plus`). Fix idiotic formatting of `SYSTEM SYNC REPLICA ... LIGHTWEIGHT FROM ...`. Fix inconsistent formatting of invalid queries with `GROUP BY GROUPING SETS ... WITH ROLLUP/CUBE/TOTALS`. Fix inconsistent formatting of `GRANT CURRENT GRANTS`. Fix inconsistent formatting of `CREATE TABLE (... COLLATE)`. Additionally, I fixed the incorrect formatting of `EXPLAIN` in subqueries (#60102). Fixed incorrect formatting of lambda functions (#60012). Added a check so there is no way to miss these abominations in the future. [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
* Allow casting of bools in string representation to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix `system.s3queue_log` [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)).
* Hide sensitive info for `S3Queue` [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)).
* S3Queue: fix a bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)).
* Fixed a minor bug that prevented distributed table queries sent from either KQL or PRQL dialect clients to be executed on replicas. [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)) [#59674](https://github.com/ClickHouse/ClickHouse/pull/59674) ([Austin Kothig](https://github.com/kothiga)).
### <a id="241"></a> ClickHouse release 24.1, 2024-01-30
#### Backward Incompatible Change

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54483)
SET(VERSION_REVISION 54484)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 2)
SET(VERSION_MINOR 3)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17)
SET(VERSION_DESCRIBE v24.2.1.1-testing)
SET(VERSION_STRING 24.2.1.1)
SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4)
SET(VERSION_DESCRIBE v24.3.1.1-testing)
SET(VERSION_STRING 24.3.1.1)
# end of autochange

View File

@ -157,7 +157,7 @@ if (TARGET ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_LIBZSTD_COMPRESSOR=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()

View File

@ -25,7 +25,7 @@ public:
static const uint32_t bits = 128;
// Constructor initializes the same as Initialize()
MetroHash128(const uint64_t seed=0);
explicit MetroHash128(const uint64_t seed=0);
// Initializes internal state for new hash with optional seed
void Initialize(const uint64_t seed=0);

View File

@ -25,7 +25,7 @@ public:
static const uint32_t bits = 64;
// Constructor initializes the same as Initialize()
MetroHash64(const uint64_t seed=0);
explicit MetroHash64(const uint64_t seed=0);
// Initializes internal state for new hash with optional seed
void Initialize(const uint64_t seed=0);

View File

@ -19,6 +19,8 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32)
### Engine parameters
- `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (http://azurite1:{port}/[account_name]{container_name}/{data_prefix}) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
- `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
- `container_name` - Container name
- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.

View File

@ -1242,7 +1242,9 @@ Configuration markup:
```
Connection parameters:
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `container_name` - Target container name, defaults to `default-container`.
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.

View File

@ -23,7 +23,6 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe
# Table of Contents
- [ClickHouse GitHub data](#clickhouse-github-data)
- [Table of Contents](#table-of-contents)
- [Generating the data](#generating-the-data)
- [Downloading and inserting the data](#downloading-and-inserting-the-data)

View File

@ -168,6 +168,28 @@ RESTORE TABLE test.table PARTITIONS '2', '3'
FROM Disk('backups', 'filename.zip')
```
### Backups as tar archives
Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.
Write a backup as a tar:
```
BACKUP TABLE test.table TO Disk('backups', '1.tar')
```
Corresponding restore:
```
RESTORE TABLE test.table FROM Disk('backups', '1.tar')
```
To change the compression method, the correct file suffix should be appended to the backup name. I.E to compress the tar archive using gzip:
```
BACKUP TABLE test.table TO Disk('backups', '1.tar.gz')
```
The supported compression file suffixes are `tar.gz`, `.tgz` `tar.bz2`, `tar.lzma`, `.tar.zst`, `.tzst` and `.tar.xz`.
### Check the status of backups
The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup. This is very useful to check the progress of long ASYNC backups. The example below shows a failure that happened when trying to overwrite an existing backup file:

View File

@ -200,17 +200,13 @@ Type: Bool
Default: 0
## dns_cache_max_size
## dns_cache_max_entries
Internal DNS cache max size in bytes.
:::note
ClickHouse also has a reverse cache, so the actual memory usage could be twice as much.
:::
Internal DNS cache max entries.
Type: UInt64
Default: 1024
Default: 10000
## dns_cache_update_period

View File

@ -1656,6 +1656,33 @@ Result:
└─────────────────────────┴─────────┘
```
### output_format_pretty_single_large_number_tip_threshold {#output_format_pretty_single_large_number_tip_threshold}
Print a readable number tip on the right side of the table if the block consists of a single number which exceeds
this value (except 0).
Possible values:
- 0 — The readable number tip will not be printed.
- Positive integer — The readable number tip will be printed if the single number exceeds this value.
Default value: `1000000`.
**Example**
Query:
```sql
SELECT 1000000000 as a;
```
Result:
```text
┌──────────a─┐
│ 1000000000 │ -- 1.00 billion
└────────────┘
```
## Template format settings {#template-format-settings}
### format_template_resultset {#format_template_resultset}

View File

@ -33,6 +33,6 @@ Result:
**See also**
- [disable_internal_dns_cache setting](../../operations/server-configuration-parameters/settings.md#disable_internal_dns_cache)
- [dns_cache_max_size setting](../../operations/server-configuration-parameters/settings.md#dns_cache_max_size)
- [dns_cache_max_entries setting](../../operations/server-configuration-parameters/settings.md#dns_cache_max_entries)
- [dns_cache_update_period setting](../../operations/server-configuration-parameters/settings.md#dns_cache_update_period)
- [dns_max_consecutive_failures setting](../../operations/server-configuration-parameters/settings.md#dns_max_consecutive_failures)

View File

@ -21,7 +21,7 @@ Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
@ -32,8 +32,7 @@ Columns:
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — OS thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/approxtopk
sidebar_position: 212
---
# approx_top_k
Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
``` sql
approx_top_k(N)(column)
approx_top_k(N, reserved)(column)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3.
**Arguments**
- `column` — The value to calculate frequency.
**Example**
Query:
``` sql
SELECT approx_top_k(2)(k)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10));
```
Result:
``` text
┌─approx_top_k(2)(k)────┐
│ [('y',3,0),('x',1,0)] │
└───────────────────────┘
```
# approx_top_count
Is an alias to `approx_top_k` function
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -0,0 +1,51 @@
---
slug: /en/sql-reference/aggregate-functions/reference/approxtopsum
sidebar_position: 212
---
# approx_top_sum
Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). Additionally, the weight of the value is taken into account.
``` sql
approx_top_sum(N)(column, weight)
approx_top_sum(N, reserved)(column, weight)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3.
**Arguments**
- `column` — The value to calculate frequency.
- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT approx_top_sum(2)(k, w)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
```
Result:
``` text
┌─approx_top_sum(2)(k, w)─┐
│ [('z',10,0),('x',5,0)] │
└─────────────────────────┘
```
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)

View File

@ -11,21 +11,23 @@ Implements the [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024
``` sql
topK(N)(column)
topK(N, load_factor)(column)
topK(N, load_factor, 'counts')(column)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Arguments**
**Parameters**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
- `N` — The number of elements to return. Optional. Default value: 10.
- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3.
- `counts` — Defines, should result contain approximate count and error value.
**Arguments**
- `x` The value to calculate frequency.
- `column` — The value to calculate frequency.
**Example**
@ -41,3 +43,9 @@ FROM ontime
│ [19393,19790,19805] │
└─────────────────────┘
```
**See Also**
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -10,13 +10,20 @@ Returns an array of the approximately most frequent values in the specified colu
**Syntax**
``` sql
topKWeighted(N)(x, weight)
topKWeighted(N)(column, weight)
topKWeighted(N, load_factor)(column, weight)
topKWeighted(N, load_factor, 'counts')(column, weight)
```
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3.
- `counts` — Defines, should result contain approximate count and error value.
**Arguments**
- `N` — The number of elements to return.
- `x` — The value.
- `column` — The value.
- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
@ -40,6 +47,23 @@ Result:
└────────────────────────┘
```
Query:
``` sql
SELECT topKWeighted(2, 10, 'counts')(k, w)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
```
Result:
``` text
┌─topKWeighted(2, 10, 'counts')(k, w)─┐
│ [('z',10,0),('x',5,0)] │
└─────────────────────────────────────┘
```
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -12,6 +12,11 @@ has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` v
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
:::note
It's not recommended to use similar types as variants (for example different numeric types like `Variant(UInt32, Int64)` or different date types like `Variant(Date, DateTime)`),
because working with values of such types can lead to ambiguity. By default, creating such `Variant` type will lead to an exception, but can be enabled using setting `allow_suspicious_variant_types`
:::
:::note
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
:::
@ -272,3 +277,121 @@ $$)
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
```
## Comparing values of Variant type
Values of a `Variant` type can be compared only with values with the same `Variant` type.
The result of operator `<` for values `v1` with underlying type `T1` and `v2` with underlying type `T2` of a type `Variant(..., T1, ... T2, ...)` is defined as follows:
- If `T1 = T2 = T`, the result will be `v1.T < v2.T` (underlying values will be compared).
- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared).
Examples:
```sql
CREATE TABLE test (v1 Variant(String, UInt64, Array(UInt32)), v2 Variant(String, UInt64, Array(UInt32))) ENGINE=Memory;
INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL);
```
```sql
SELECT v2, variantType(v2) as v2_type from test order by v2;
```
```text
┌─v2──────┬─v2_type───────┐
│ [] │ Array(UInt32) │
│ [1,2,3] │ Array(UInt32) │
│ abc │ String │
│ 42 │ UInt64 │
│ 43 │ UInt64 │
│ ᴺᵁᴸᴸ │ None │
└─────────┴───────────────┘
```
```sql
SELECT v1, variantType(v1) as v1_type, v2, variantType(v2) as v2_type, v1 = v2, v1 < v2, v1 > v2 from test;
```
```text
┌─v1─┬─v1_type─┬─v2──────┬─v2_type───────┬─equals(v1, v2)─┬─less(v1, v2)─┬─greater(v1, v2)─┐
│ 42 │ UInt64 │ 42 │ UInt64 │ 1 │ 0 │ 0 │
│ 42 │ UInt64 │ 43 │ UInt64 │ 0 │ 1 │ 0 │
│ 42 │ UInt64 │ abc │ String │ 0 │ 0 │ 1 │
│ 42 │ UInt64 │ [1,2,3] │ Array(UInt32) │ 0 │ 0 │ 1 │
│ 42 │ UInt64 │ [] │ Array(UInt32) │ 0 │ 0 │ 1 │
│ 42 │ UInt64 │ ᴺᵁᴸᴸ │ None │ 0 │ 1 │ 0 │
└────┴─────────┴─────────┴───────────────┴────────────────┴──────────────┴─────────────────┘
```
If you need to find the row with specific `Variant` value, you can do one of the following:
- Cast value to the corresponding `Variant` type:
```sql
SELECT * FROM test WHERE v2 == [1,2,3]::Array(UInt32)::Variant(String, UInt64, Array(UInt32));
```
```text
┌─v1─┬─v2──────┐
│ 42 │ [1,2,3] │
└────┴─────────┘
```
- Compare `Variant` subcolumn with required type:
```sql
SELECT * FROM test WHERE v2.`Array(UInt32)` == [1,2,3] -- or using variantElement(v2, 'Array(UInt32)')
```
```text
┌─v1─┬─v2──────┐
│ 42 │ [1,2,3] │
└────┴─────────┘
```
Sometimes it can be useful to make additional check on variant type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types:
```sql
SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE v2.`Array(UInt32)` == [];
```
```text
┌─v2───┬─v2.Array(UInt32)─┬─variantType(v2)─┐
│ 42 │ [] │ UInt64 │
│ 43 │ [] │ UInt64 │
│ abc │ [] │ String │
│ [] │ [] │ Array(UInt32) │
│ ᴺᵁᴸᴸ │ [] │ None │
└──────┴──────────────────┴─────────────────┘
```
```sql
SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE variantType(v2) == 'Array(UInt32)' AND v2.`Array(UInt32)` == [];
```
```text
┌─v2─┬─v2.Array(UInt32)─┬─variantType(v2)─┐
│ [] │ [] │ Array(UInt32) │
└────┴──────────────────┴─────────────────┘
```
**Note:** values of variants with different numeric types are considered as different variants and not compared between each other, their type names are compared instead.
Example:
```sql
SET allow_suspicious_variant_types = 1;
CREATE TABLE test (v Variant(UInt32, Int64)) ENGINE=Memory;
INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64);
SELECT v, variantType(v) FROM test ORDER by v;
```
```text
┌─v───┬─variantType(v)─┐
│ 1 │ Int64 │
│ 100 │ Int64 │
│ 1 │ UInt32 │
│ 100 │ UInt32 │
└─────┴────────────────┘
```

View File

@ -167,6 +167,10 @@ Result:
└──────────────────────────────────────────┴───────────────────────────────┘
```
## byteSlice(s, offset, length)
See function [substring](string-functions.md#substring).
## bitTest
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left.

View File

@ -558,6 +558,7 @@ substring(s, offset[, length])
Alias:
- `substr`
- `mid`
- `byteSlice`
**Arguments**

View File

@ -68,7 +68,7 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
Clears ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_max_size, dns_cache_update_period parameters.
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_max_entries, dns_cache_update_period parameters.
## DROP MARK CACHE

View File

@ -4,6 +4,7 @@
#include <Coordination/CoordinationSettings.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/ZooKeeperDataReader.h>
#include <Coordination/KeeperContext.h>
#include <Common/TerminalSize.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/AutoPtr.h>

View File

@ -1774,7 +1774,7 @@ try
}
else
{
DNSResolver::instance().setCacheMaxSize(server_settings.dns_cache_max_size);
DNSResolver::instance().setCacheMaxEntries(server_settings.dns_cache_max_entries);
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(

View File

@ -0,0 +1 @@
../../../tests/config/config.d/handlers.yaml

View File

@ -752,7 +752,7 @@
<!-- Comma-separated list of prefixes for user-defined settings.
The server will allow to set these settings, and retrieve them with the getSetting function.
They are also logged in the query_log, similarly to other settings, but have no special effect.
The "SQL_" prefix is introduced for compatibility with MySQL - these settings are being set be Tableau.
The "SQL_" prefix is introduced for compatibility with MySQL - these settings are being set by Tableau.
-->
<custom_settings_prefixes>SQL_</custom_settings_prefixes>
@ -1477,17 +1477,23 @@
</query_masking_rules> -->
<!-- Uncomment to use custom http handlers.
rules are checked from top to bottom, first match runs the handler
url - to match request URL, you can use 'regex:' prefix to use regex match(optional)
empty_query_string - check that there is no query string in the URL
methods - to match request method, you can use commas to separate multiple method matches(optional)
headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional)
handler is request handler
type - supported types: static, dynamic_query_handler, predefined_query_handler
type - supported types: static, dynamic_query_handler, predefined_query_handler, redirect
query - use with predefined_query_handler type, executes query when the handler is called
query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params
status - use with static type, response status code
content_type - use with static type, response content-type
response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client.
url - a location for redirect
Along with a list of rules, you can specify <defaults/> which means - enable all the default handlers.
<http_handlers>
<rule>

View File

@ -523,49 +523,14 @@ let default_params = {
let params = default_params;
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
const baseHSL = hexToHsl(baseColor);
const hueStep = 360 / numColors;
const palette = [];
function generatePalette(numColors) {
palette = [];
for (let i = 0; i < numColors; i++) {
const hue = Math.round((baseHSL.h + i * hueStep) % 360);
const color = `hsl(${hue}, ${baseHSL.s}%, ${baseHSL.l}%)`;
palette.push(color);
palette.push(`oklch(${theme != 'dark' ? 0.75 : 0.5}, 0.15, ${360 * i / numColors})`);
}
return palette;
}
/// Helper function to convert hex color to HSL
function hexToHsl(hex) {
hex = hex.replace(/^#/, '');
const bigint = parseInt(hex, 16);
const r = (bigint >> 16) & 255;
const g = (bigint >> 8) & 255;
const b = bigint & 255;
const r_norm = r / 255;
const g_norm = g / 255;
const b_norm = b / 255;
const max = Math.max(r_norm, g_norm, b_norm);
const min = Math.min(r_norm, g_norm, b_norm);
const l = (max + min) / 2;
let s = 0;
if (max !== min) {
s = l > 0.5 ? (max - min) / (2 - max - min) : (max - min) / (max + min);
}
let h = 0;
if (max !== min) {
if (max === r_norm) {
h = (g_norm - b_norm) / (max - min) + (g_norm < b_norm ? 6 : 0);
} else if (max === g_norm) {
h = (b_norm - r_norm) / (max - min) + 2;
} else {
h = (r_norm - g_norm) / (max - min) + 4;
}
}
h = Math.round(h * 60);
return { h, s: Math.round(s * 100), l: Math.round(l * 100) };
}
let theme = 'light';
function setTheme(new_theme) {
@ -1207,7 +1172,7 @@ async function draw(idx, chart, url_params, query) {
// Treat every column as series
const series_count = reply.meta.length;
const fill = series_count == 2 ? fill_color : undefined;
const palette = generatePalette(line_color, series_count);
const palette = series_count == 2 ? [line_color] : generatePalette(series_count);
let max_value = Number.NEGATIVE_INFINITY;
for (let i = 1; i < series_count; i++) {
let label = reply.meta[i].name;

230
rust/Cargo.lock generated
View File

@ -6,7 +6,7 @@ version = 3
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"prqlc",
"serde_json",
]
@ -79,16 +79,15 @@ dependencies = [
[[package]]
name = "anstream"
version = "0.3.2"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
@ -113,33 +112,33 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys 0.52.0",
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "1.0.2"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys 0.48.0",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.75"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
checksum = "dd002a6223f12c7a95cdd4b1cb3a0149d22d37f7a9ecdb2cb691a071fe236c29"
dependencies = [
"unicode-width",
"yansi",
@ -213,16 +212,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.31"
version = "0.4.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.48.5",
"windows-targets 0.52.0",
]
[[package]]
@ -368,7 +367,7 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -385,7 +384,7 @@ checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -509,18 +508,14 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
name = "errno"
version = "0.3.8"
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fnv"
@ -570,12 +565,6 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "iana-time-zone"
version = "0.1.58"
@ -606,23 +595,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "is-terminal"
version = "0.4.9"
name = "indexmap"
version = "2.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
"equivalent",
"hashbrown",
]
[[package]]
@ -681,12 +660,6 @@ dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "log"
version = "0.4.20"
@ -805,45 +778,35 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro2"
version = "1.0.70"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-ast"
version = "0.9.5"
name = "prqlc"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a"
dependencies = [
"enum-as-inner",
"semver",
"serde",
"strum",
]
[[package]]
name = "prql-compiler"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c"
checksum = "4beb05b6b71ce096fa56d73006ab1c42a8d11bf190d193fa511a134f7730ec43"
dependencies = [
"anstream",
"anyhow",
"ariadne",
"chrono",
"csv",
"enum-as-inner",
"itertools 0.11.0",
"itertools",
"log",
"once_cell",
"prql-ast",
"prql-parser",
"prqlc-ast",
"prqlc-parser",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
@ -851,15 +814,29 @@ dependencies = [
]
[[package]]
name = "prql-parser"
version = "0.9.5"
name = "prqlc-ast"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1"
checksum = "c98923b046bc48046e3846b14a5fde5a059f681c7c367bd0ab96ebd3ecc33a71"
dependencies = [
"anyhow",
"enum-as-inner",
"semver",
"serde",
"strum",
]
[[package]]
name = "prqlc-parser"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "855ad9aba599ef608efc88a30ebd731155997d9bbe780639eb175de060b6cddc"
dependencies = [
"chumsky",
"itertools 0.11.0",
"prql-ast",
"itertools",
"prqlc-ast",
"semver",
"stacker",
]
[[package]]
@ -873,9 +850,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.33"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
@ -922,9 +899,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.10.2"
version = "1.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
@ -934,9 +911,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.3"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
@ -955,19 +932,6 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags 2.4.1",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
]
[[package]]
name = "rustversion"
version = "1.0.14"
@ -988,44 +952,57 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
[[package]]
name = "semver"
version = "1.0.20"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.193"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
name = "serde_json"
version = "1.0.108"
version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd075d994154d4a774f95b51fb96bdc2832b0ea48425c92546073816cda1f2f"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "skim"
version = "0.10.4"
@ -1057,16 +1034,16 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
dependencies = [
"itertools 0.12.0",
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.37.0"
version = "0.43.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075"
checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4"
dependencies = [
"log",
"serde",
@ -1093,24 +1070,24 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.25.0"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.25.3"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -1126,9 +1103,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.41"
version = "2.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
dependencies = [
"proc-macro2",
"quote",
@ -1172,7 +1149,7 @@ checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -1244,6 +1221,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -1304,7 +1287,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
"wasm-bindgen-shared",
]
@ -1326,7 +1309,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -1377,15 +1360,6 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
@ -1532,5 +1506,5 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]

View File

@ -3,10 +3,8 @@ edition = "2021"
name = "_ch_rust_prql"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.9.3"
prqlc = {version = "0.11.3", default-features = false}
serde_json = "1.0"
[lib]

View File

@ -1,8 +1,8 @@
use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use prqlc::sql::Dialect;
use prqlc::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
use std::panic;
use std::slice;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
@ -37,7 +37,7 @@ pub unsafe extern "C" fn prql_to_sql_impl(
signature_comment: false,
color: false,
};
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
let (is_err, res) = match prqlc::compile(&prql_query, &opts) {
Ok(sql_str) => (false, sql_str),
Err(err) => (true, err.to_string()),
};

View File

@ -24,7 +24,7 @@ class HTTPAuthClient
public:
using Result = TResponseParser::Result;
HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
explicit HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
: timeouts{params.timeouts}
, max_tries{params.max_tries}
, retry_initial_backoff_ms{params.retry_initial_backoff_ms}

View File

@ -1,5 +1,5 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/SingleValueData.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
@ -11,219 +11,347 @@ struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
namespace
{
struct AggregateFunctionAnyRespectNullsData
template <typename Data>
class AggregateFunctionAny final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>
{
enum Status : UInt8
{
NotSet = 1,
SetNull = 2,
SetOther = 3
};
Status status = Status::NotSet;
Field value;
bool isSet() const { return status != Status::NotSet; }
void setNull() { status = Status::SetNull; }
void setOther() { status = Status::SetOther; }
};
template <bool First>
class AggregateFunctionAnyRespectNulls final
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
{
public:
using Data = AggregateFunctionAnyRespectNullsData;
private:
SerializationPtr serialization;
const bool returns_nullable_type = false;
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, returns_nullable_type(type->isNullable())
public:
explicit AggregateFunctionAny(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
}
String getName() const override
{
if constexpr (First)
return "any_respect_nulls";
else
return "anyLast_respect_nulls";
}
String getName() const override { return "any"; }
bool allocatesMemoryInArena() const override { return false; }
void addNull(AggregateDataPtr __restrict place) const
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
chassert(returns_nullable_type);
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setNull();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if (columns[0]->isNullable())
{
if (columns[0]->isNullAt(row_num))
return addNull(place);
}
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setOther();
columns[0]->get(row_num, d.value);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (columns[0]->isNullable())
addNull(place);
else
add(place, columns, 0, arena);
if (!this->data(place).has())
this->data(place).set(*columns[0], row_num, arena);
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (this->data(place).has() || row_begin >= row_end)
return;
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
size_t size = row_end - row_begin;
for (size_t i = 0; i < size; ++i)
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{
size_t pos = First ? row_begin + i : row_end - 1 - i;
if (flags[pos])
if (if_map.data()[i] != 0)
{
add(place, columns, pos, arena);
break;
this->data(place).set(*columns[0], i, arena);
return;
}
}
}
else if (row_begin < row_end)
else
{
size_t pos = First ? row_begin : row_end - 1;
add(place, columns, pos, arena);
this->data(place).set(*columns[0], row_begin, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override
{
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
/// have discarded values that we need (NULLs)
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & d = this->data(place);
if (First && d.isSet())
if (this->data(place).has() || row_begin >= row_end)
return;
auto & other = this->data(rhs);
if (other.isSet())
if (if_argument_pos >= 0)
{
d.status = other.status;
d.value = other.value;
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{
if (if_map.data()[i] != 0 && null_map[i] == 0)
{
this->data(place).set(*columns[0], i, arena);
return;
}
}
}
else
{
for (size_t i = row_begin; i < row_end; i++)
{
if (null_map[i] == 0)
{
this->data(place).set(*columns[0], i, arena);
return;
}
}
}
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (!this->data(place).has())
this->data(place).set(*columns[0], 0, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if (!this->data(place).has())
this->data(place).set(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & d = this->data(place);
UInt8 k = d.status;
writeBinaryLittleEndian<UInt8>(k, buf);
if (k == Data::Status::SetOther)
serialization->serializeBinary(d.value, buf, {});
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
auto & d = this->data(place);
UInt8 k = Data::Status::NotSet;
readBinaryLittleEndian<UInt8>(k, buf);
d.status = static_cast<Data::Status>(k);
if (d.status == Data::Status::NotSet)
return;
else if (d.status == Data::Status::SetNull)
{
if (!returns_nullable_type)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
return;
}
else if (d.status == Data::Status::SetOther)
serialization->deserializeBinary(d.value, buf, {});
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & d = this->data(place);
if (d.status == Data::Status::SetOther)
to.insert(d.value);
else
to.insertDefault();
this->data(place).insertResultInto(to);
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & original_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return original_function;
if constexpr (!Data::is_compilable)
return false;
else
return Data::isCompilable(*this->argument_types[0]);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
Data::compileCreate(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (Data::is_compilable)
Data::compileAny(builder, aggregate_data_ptr, arguments[0].value);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (Data::is_compilable)
Data::compileAnyMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
return Data::compileGetResult(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
#endif
};
template <bool First>
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
AggregateFunctionPtr
createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionAny, /* unary */ true>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
template <typename Data>
class AggregateFunctionAnyLast final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>
{
private:
SerializationPtr serialization;
public:
explicit AggregateFunctionAnyLast(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
String getName() const override { return "anyLast"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
this->data(place).set(*columns[0], row_num, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (row_begin >= row_end)
return;
size_t batch_size = row_end - row_begin;
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = 0; i < batch_size; i++)
{
size_t pos = (row_end - 1) - i;
if (if_map.data()[pos] != 0)
{
this->data(place).set(*columns[0], pos, arena);
return;
}
}
}
else
{
this->data(place).set(*columns[0], row_end - 1, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (row_begin >= row_end)
return;
size_t batch_size = row_end - row_begin;
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = 0; i < batch_size; i++)
{
size_t pos = (row_end - 1) - i;
if (if_map.data()[pos] != 0 && null_map[pos] == 0)
{
this->data(place).set(*columns[0], pos, arena);
return;
}
}
}
else
{
for (size_t i = 0; i < batch_size; i++)
{
size_t pos = (row_end - 1) - i;
if (null_map[pos] == 0)
{
this->data(place).set(*columns[0], pos, arena);
return;
}
}
}
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
this->data(place).set(*columns[0], 0, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).set(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (!Data::is_compilable)
return false;
else
return Data::isCompilable(*this->argument_types[0]);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
Data::compileCreate(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (Data::is_compilable)
Data::compileAnyLast(builder, aggregate_data_ptr, arguments[0].value);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (Data::is_compilable)
Data::compileAnyLastMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
return Data::compileGetResult(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
#endif
};
AggregateFunctionPtr createAggregateFunctionAnyLast(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionAnyLast, /* unary */ true>(name, argument_types, parameters, settings));
}
}
@ -231,27 +359,11 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
AggregateFunctionProperties default_properties_for_respect_nulls
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
factory.registerNullsActionTransformation("any", "any_respect_nulls");
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
}
}

View File

@ -0,0 +1,168 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/SingleValueData.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
/** Implement 'heavy hitters' algorithm.
* Selects most frequent value if its frequency is more than 50% in each thread of execution.
* Otherwise, selects some arbitrary value.
* http://www.cs.umd.edu/~samir/498/karp.pdf
*/
struct AggregateFunctionAnyHeavyData
{
using Self = AggregateFunctionAnyHeavyData;
private:
SingleValueDataBaseMemoryBlock v_data;
UInt64 counter = 0;
public:
[[noreturn]] explicit AggregateFunctionAnyHeavyData()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyHeavyData initialized empty");
}
explicit AggregateFunctionAnyHeavyData(TypeIndex value_type) { generateSingleValueFromTypeIndex(value_type, v_data); }
~AggregateFunctionAnyHeavyData() { data().~SingleValueDataBase(); }
SingleValueDataBase & data() { return v_data.get(); }
const SingleValueDataBase & data() const { return v_data.get(); }
void add(const IColumn & column, size_t row_num, Arena * arena)
{
if (data().isEqualTo(column, row_num))
{
++counter;
}
else if (counter == 0)
{
data().set(column, row_num, arena);
++counter;
}
else
{
--counter;
}
}
void add(const Self & to, Arena * arena)
{
if (!to.data().has())
return;
if (data().isEqualTo(to.data()))
counter += to.counter;
else if (!data().has() || counter < to.counter)
data().set(to.data(), arena);
else
counter -= to.counter;
}
void addManyDefaults(const IColumn & column, size_t length, Arena * arena)
{
for (size_t i = 0; i < length; ++i)
add(column, 0, arena);
}
void write(WriteBuffer & buf, const ISerialization & serialization) const
{
data().write(buf, serialization);
writeBinaryLittleEndian(counter, buf);
}
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena)
{
data().read(buf, serialization, arena);
readBinaryLittleEndian(counter, buf);
}
void insertResultInto(IColumn & to) const { data().insertResultInto(to); }
};
class AggregateFunctionAnyHeavy final : public IAggregateFunctionDataHelper<AggregateFunctionAnyHeavyData, AggregateFunctionAnyHeavy>
{
private:
SerializationPtr serialization;
const TypeIndex value_type_index;
public:
explicit AggregateFunctionAnyHeavy(const DataTypePtr & type)
: IAggregateFunctionDataHelper<AggregateFunctionAnyHeavyData, AggregateFunctionAnyHeavy>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, value_type_index(WhichDataType(type).idx)
{
}
void create(AggregateDataPtr __restrict place) const override { new (place) AggregateFunctionAnyHeavyData(value_type_index); }
String getName() const override { return "anyHeavy"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
this->data(place).add(*columns[0], row_num, arena);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
this->data(place).addManyDefaults(*columns[0], 0, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).add(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
};
AggregateFunctionPtr
createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & res_type = argument_types[0];
return AggregateFunctionPtr(new AggregateFunctionAnyHeavy(res_type));
}
}
void registerAggregateFunctionAnyHeavy(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
}
}

View File

@ -0,0 +1,235 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/SingleValueData.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
namespace
{
struct AggregateFunctionAnyRespectNullsData
{
enum class Status : UInt8
{
NotSet = 1,
SetNull = 2,
SetOther = 3
};
Status status = Status::NotSet;
Field value;
bool isSet() const { return status != Status::NotSet; }
void setNull() { status = Status::SetNull; }
void setOther() { status = Status::SetOther; }
};
template <bool First>
class AggregateFunctionAnyRespectNulls final
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
{
public:
using Data = AggregateFunctionAnyRespectNullsData;
SerializationPtr serialization;
const bool returns_nullable_type = false;
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, returns_nullable_type(type->isNullable())
{
}
String getName() const override
{
if constexpr (First)
return "any_respect_nulls";
else
return "anyLast_respect_nulls";
}
bool allocatesMemoryInArena() const override { return false; }
void addNull(AggregateDataPtr __restrict place) const
{
chassert(returns_nullable_type);
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setNull();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if (columns[0]->isNullable())
{
if (columns[0]->isNullAt(row_num))
return addNull(place);
}
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setOther();
columns[0]->get(row_num, d.value);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (columns[0]->isNullable())
addNull(place);
else
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
size_t size = row_end - row_begin;
for (size_t i = 0; i < size; ++i)
{
size_t pos = First ? row_begin + i : row_end - 1 - i;
if (flags[pos])
{
add(place, columns, pos, arena);
break;
}
}
}
else if (row_begin < row_end)
{
size_t pos = First ? row_begin : row_end - 1;
add(place, columns, pos, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
{
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
/// have discarded values that we need (NULLs)
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & d = this->data(place);
if (First && d.isSet())
return;
auto & other = this->data(rhs);
if (other.isSet())
{
d.status = other.status;
d.value = other.value;
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & d = this->data(place);
UInt8 k = static_cast<UInt8>(d.status);
writeBinaryLittleEndian<UInt8>(k, buf);
if (d.status == Data::Status::SetOther)
serialization->serializeBinary(d.value, buf, {});
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
auto & d = this->data(place);
UInt8 k = 0;
readBinaryLittleEndian<UInt8>(k, buf);
d.status = static_cast<Data::Status>(k);
if (d.status == Data::Status::NotSet)
return;
else if (d.status == Data::Status::SetNull)
{
if (!returns_nullable_type)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
return;
}
else if (d.status == Data::Status::SetOther)
{
serialization->deserializeBinary(d.value, buf, {});
return;
}
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & d = this->data(place);
if (d.status == Data::Status::SetOther)
to.insert(d.value);
else
to.insertDefault();
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & original_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
return original_function;
}
};
template <bool First>
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
}
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
}
}
void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties default_properties_for_respect_nulls
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
/// Must happen after registering any and anyLast
factory.registerNullsActionTransformation("any", "any_respect_nulls");
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
}
}

View File

@ -1,107 +0,0 @@
#pragma once
#include <base/StringRef.h>
#include <DataTypes/IDataType.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionMinMaxAny.h> // SingleValueDataString used in embedded compiler
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int CORRUPTED_DATA;
}
/// For possible values for template parameters, see 'AggregateFunctionMinMaxAny.h'.
template <typename ResultData, typename ValueData>
struct AggregateFunctionArgMinMaxData
{
using ResultData_t = ResultData;
using ValueData_t = ValueData;
ResultData result; // the argument at which the minimum/maximum value is reached.
ValueData value; // value for which the minimum/maximum is calculated.
static bool allocatesMemoryInArena()
{
return ResultData::allocatesMemoryInArena() || ValueData::allocatesMemoryInArena();
}
};
/// Returns the first arg value found for the minimum/maximum value. Example: argMax(arg, value).
template <typename Data>
class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data>>
{
private:
const DataTypePtr & type_val;
const SerializationPtr serialization_res;
const SerializationPtr serialization_val;
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data>>;
public:
AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_)
: Base({type_res_, type_val_}, {}, type_res_)
, type_val(this->argument_types[1])
, serialization_res(type_res_->getDefaultSerialization())
, serialization_val(type_val->getDefaultSerialization())
{
if (!type_val->isComparable())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of "
"aggregate function {} because the values of that data type are not comparable",
type_val->getName(), getName());
}
String getName() const override
{
return StringRef(Data::ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax";
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena))
this->data(place).result.change(*columns[0], row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if (this->data(place).value.changeIfBetter(this->data(rhs).value, arena))
this->data(place).result.change(this->data(rhs).result, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).result.write(buf, *serialization_res);
this->data(place).value.write(buf, *serialization_val);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).result.read(buf, *serialization_res, arena);
this->data(place).value.read(buf, *serialization_val, arena);
if (unlikely(this->data(place).value.has() != this->data(place).result.has()))
throw Exception(
ErrorCodes::CORRUPTED_DATA,
"Invalid state of the aggregate function {}: has_value ({}) != has_result ({})",
getName(),
this->data(place).value.has(),
this->data(place).result.has());
}
bool allocatesMemoryInArena() const override
{
return Data::allocatesMemoryInArena();
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).result.insertResultInto(to);
}
};
}

View File

@ -204,7 +204,7 @@ private:
class Adam : public IWeightsUpdater
{
public:
Adam(size_t num_params)
explicit Adam(size_t num_params)
{
beta1_powered = beta1;
beta2_powered = beta2;

View File

@ -1,238 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <Common/Concepts.h>
#include <Common/findExtreme.h>
namespace DB
{
struct Settings;
namespace
{
template <typename Data>
class AggregateFunctionsSingleValueMax final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;
public:
explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }
/// Specializations for native numeric types
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findExtremeMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findExtremeMax(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
{
/// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
/// faster than doing a permutation
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}
constexpr int nan_null_direction_hint = -1;
auto const & column = *columns[0];
if (if_argument_pos >= 0)
{
size_t index = row_begin;
const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
while (if_flags[index] == 0 && index < row_end)
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i;
}
this->data(place).changeIfGreater(column, index, arena);
}
else
{
if (row_begin >= row_end)
return;
/// TODO: Introduce row_begin and row_end to getPermutation
if (row_begin != 0 || row_end != column.size())
{
size_t index = row_begin;
for (size_t i = index + 1; i < row_end; i++)
{
if (column.compareAt(i, index, column, nan_null_direction_hint) > 0)
index = i;
}
this->data(place).changeIfGreater(column, index, arena);
}
else
{
constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Descending;
constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
IColumn::Permutation permutation;
constexpr UInt64 limit = 1;
column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
this->data(place).changeIfGreater(column, permutation[0], arena);
}
}
}
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findExtremeMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findExtremeMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
{
/// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
/// faster than doing a permutation
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}
constexpr int nan_null_direction_hint = -1;
auto const & column = *columns[0];
if (if_argument_pos >= 0)
{
size_t index = row_begin;
const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i;
}
this->data(place).changeIfGreater(column, index, arena);
}
else
{
size_t index = row_begin;
while ((null_map[index] != 0) && (index < row_end))
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i;
}
this->data(place).changeIfGreater(column, index, arena);
}
}
AggregateFunctionPtr createAggregateFunctionMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMax, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionArgMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMaxData>(name, argument_types, parameters, settings));
}
}
void registerAggregateFunctionsMax(AggregateFunctionFactory & factory)
{
factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive);
/// The functions below depend on the order of data.
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties });
}
}

View File

@ -1,240 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <Common/Concepts.h>
#include <Common/findExtreme.h>
namespace DB
{
struct Settings;
namespace
{
template <typename Data>
class AggregateFunctionsSingleValueMin final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;
public:
explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }
/// Specializations for native numeric types
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findExtremeMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findExtremeMin(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
{
/// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
/// faster than doing a permutation
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}
constexpr int nan_null_direction_hint = 1;
auto const & column = *columns[0];
if (if_argument_pos >= 0)
{
size_t index = row_begin;
const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
while (if_flags[index] == 0 && index < row_end)
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i;
}
this->data(place).changeIfLess(column, index, arena);
}
else
{
if (row_begin >= row_end)
return;
/// TODO: Introduce row_begin and row_end to getPermutation
if (row_begin != 0 || row_end != column.size())
{
size_t index = row_begin;
for (size_t i = index + 1; i < row_end; i++)
{
if (column.compareAt(i, index, column, nan_null_direction_hint) < 0)
index = i;
}
this->data(place).changeIfLess(column, index, arena);
}
else
{
constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Ascending;
constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
IColumn::Permutation permutation;
constexpr UInt64 limit = 1;
column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
this->data(place).changeIfLess(column, permutation[0], arena);
}
}
}
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findExtremeMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findExtremeMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
{
/// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
/// faster than doing a permutation
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}
constexpr int nan_null_direction_hint = 1;
auto const & column = *columns[0];
if (if_argument_pos >= 0)
{
size_t index = row_begin;
const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i;
}
this->data(place).changeIfLess(column, index, arena);
}
else
{
size_t index = row_begin;
while ((null_map[index] != 0) && (index < row_end))
index++;
if (index >= row_end)
return;
for (size_t i = index + 1; i < row_end; i++)
{
if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i;
}
this->data(place).changeIfLess(column, index, arena);
}
}
AggregateFunctionPtr createAggregateFunctionMin(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMin, AggregateFunctionMinData>(
name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionArgMin(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMinData>(name, argument_types, parameters, settings));
}
}
void registerAggregateFunctionsMin(AggregateFunctionFactory & factory)
{
factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive);
/// The functions below depend on the order of data.
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties });
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +1,193 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include "registerAggregateFunctions.h"
#include <AggregateFunctions/SingleValueData.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
/** The aggregate function 'singleValueOrNull' is used to implement subquery operators,
* such as x = ALL (SELECT ...)
* It checks if there is only one unique non-NULL value in the data.
* If there is only one unique value - returns it.
* If there are zero or at least two distinct values - returns NULL.
*/
AggregateFunctionPtr createAggregateFunctionSingleValueOrNull(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
struct AggregateFunctionSingleValueOrNullData
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionSingleValueOrNullData>(name, argument_types, parameters, settings));
using Self = AggregateFunctionSingleValueOrNullData;
private:
SingleValueDataBaseMemoryBlock v_data;
bool first_value = true;
bool is_null = false;
public:
[[noreturn]] explicit AggregateFunctionSingleValueOrNullData()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionSingleValueOrNullData initialized empty");
}
explicit AggregateFunctionSingleValueOrNullData(TypeIndex value_type) { generateSingleValueFromTypeIndex(value_type, v_data); }
~AggregateFunctionSingleValueOrNullData() { data().~SingleValueDataBase(); }
SingleValueDataBase & data() { return v_data.get(); }
const SingleValueDataBase & data() const { return v_data.get(); }
bool isNull() const { return is_null; }
void add(const IColumn & column, size_t row_num, Arena * arena)
{
if (first_value)
{
first_value = false;
data().set(column, row_num, arena);
}
else if (!data().isEqualTo(column, row_num))
{
is_null = true;
}
}
void add(const Self & to, Arena * arena)
{
if (!to.data().has())
return;
if (first_value && !to.first_value)
{
first_value = false;
data().set(to.data(), arena);
}
else if (!data().isEqualTo(to.data()))
{
is_null = true;
}
}
/// TODO: Methods write and read lose data (first_value and is_null)
/// Fixing it requires a breaking change (but it's probably necessary)
void write(WriteBuffer & buf, const ISerialization & serialization) const { data().write(buf, serialization); }
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena) { data().read(buf, serialization, arena); }
void insertResultInto(IColumn & to) const
{
if (is_null || first_value)
{
to.insertDefault();
}
else
{
ColumnNullable & col = typeid_cast<ColumnNullable &>(to);
col.getNullMapColumn().insertDefault();
data().insertResultInto(col.getNestedColumn());
}
}
};
class AggregateFunctionSingleValueOrNull final
: public IAggregateFunctionDataHelper<AggregateFunctionSingleValueOrNullData, AggregateFunctionSingleValueOrNull>
{
private:
SerializationPtr serialization;
const TypeIndex value_type_index;
public:
explicit AggregateFunctionSingleValueOrNull(const DataTypePtr & type)
: IAggregateFunctionDataHelper<AggregateFunctionSingleValueOrNullData, AggregateFunctionSingleValueOrNull>(
{type}, {}, makeNullable(type))
, serialization(type->getDefaultSerialization())
, value_type_index(WhichDataType(type).idx)
{
}
void create(AggregateDataPtr __restrict place) const override { new (place) AggregateFunctionSingleValueOrNullData(value_type_index); }
String getName() const override { return "singleValueOrNull"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
this->data(place).add(*columns[0], row_num, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (this->data(place).isNull())
return;
IAggregateFunctionDataHelper<Data, AggregateFunctionSingleValueOrNull>::addBatchSinglePlace(
row_begin, row_end, place, columns, arena, if_argument_pos);
}
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (this->data(place).isNull())
return;
IAggregateFunctionDataHelper<Data, AggregateFunctionSingleValueOrNull>::addBatchSinglePlaceNotNull(
row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
this->data(place).add(*columns[0], 0, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).add(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
};
AggregateFunctionPtr createAggregateFunctionSingleValueOrNull(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & res_type = argument_types[0];
return AggregateFunctionPtr(new AggregateFunctionSingleValueOrNull(res_type));
}
}
@ -22,6 +196,4 @@ void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory & facto
{
factory.registerFunction("singleValueOrNull", createAggregateFunctionSingleValueOrNull);
}
}

View File

@ -5,12 +5,14 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeIPv4andIPv6.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadHelpersArena.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnArray.h>
@ -31,6 +33,7 @@ namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -58,22 +61,52 @@ protected:
using State = AggregateFunctionTopKData<T>;
UInt64 threshold;
UInt64 reserved;
bool include_counts;
bool is_approx_top_k;
public:
AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_))
, threshold(threshold_), reserved(load_factor * threshold)
AggregateFunctionTopK(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_, include_counts_))
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_)
{}
AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
AggregateFunctionTopK(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, result_type_)
, threshold(threshold_), reserved(load_factor * threshold)
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_)
{}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
static DataTypePtr createResultType(const DataTypes & argument_types_)
String getName() const override
{
if (is_approx_top_k)
return is_weighted ? "approx_top_sum" : "approx_top_k";
else
return is_weighted ? "topKWeighted" : "topK";
}
static DataTypePtr createResultType(const DataTypes & argument_types_, bool include_counts_)
{
if (include_counts_)
{
DataTypes types
{
argument_types_[0],
std::make_shared<DataTypeUInt64>(),
std::make_shared<DataTypeUInt64>(),
};
Strings names
{
"item",
"count",
"error",
};
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
));
}
else
return std::make_shared<DataTypeArray>(argument_types_[0]);
}
@ -122,13 +155,40 @@ public:
offsets_to.push_back(offsets_to.back() + size);
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
size_t old_size = data_to.size();
data_to.resize(old_size + size);
IColumn & data_to = arr_to.getData();
if (include_counts)
{
auto & column_tuple = assert_cast<ColumnTuple &>(data_to);
auto & column_key = assert_cast<ColumnVector<T> &>(column_tuple.getColumn(0)).getData();
auto & column_count = assert_cast<ColumnVector<UInt64> &>(column_tuple.getColumn(1)).getData();
auto & column_error = assert_cast<ColumnVector<UInt64> &>(column_tuple.getColumn(2)).getData();
size_t old_size = column_key.size();
column_key.resize(old_size + size);
column_count.resize(old_size + size);
column_error.resize(old_size + size);
size_t i = 0;
for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
data_to[old_size + i] = it->key;
{
column_key[old_size + i] = it->key;
column_count[old_size + i] = it->count;
column_error[old_size + i] = it->error;
}
} else
{
auto & column_key = assert_cast<ColumnVector<T> &>(data_to).getData();
size_t old_size = column_key.size();
column_key.resize(old_size + size);
size_t i = 0;
for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
{
column_key[old_size + i] = it->key;
}
}
}
};
@ -153,19 +213,51 @@ private:
UInt64 threshold;
UInt64 reserved;
bool include_counts;
bool is_approx_top_k;
public:
AggregateFunctionTopKGeneric(
UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_))
, threshold(threshold_), reserved(load_factor * threshold) {}
UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_, include_counts_))
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_) {}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
String getName() const override
{
if (is_approx_top_k)
return is_weighted ? "approx_top_sum" : "approx_top_k";
else
return is_weighted ? "topKWeighted" : "topK";
}
static DataTypePtr createResultType(const DataTypes & argument_types_)
static DataTypePtr createResultType(const DataTypes & argument_types_, bool include_counts_)
{
if (include_counts_)
{
DataTypes types
{
argument_types_[0],
std::make_shared<DataTypeUInt64>(),
std::make_shared<DataTypeUInt64>(),
};
Strings names
{
"item",
"count",
"error",
};
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
));
} else
{
return std::make_shared<DataTypeArray>(argument_types_[0]);
}
}
bool allocatesMemoryInArena() const override
{
@ -247,14 +339,34 @@ public:
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
const typename State::Set & set = this->data(place).value;
auto result_vec = set.topK(threshold);
size_t size = result_vec.size();
offsets_to.push_back(offsets_to.back() + size);
IColumn & data_to = arr_to.getData();
auto result_vec = this->data(place).value.topK(threshold);
offsets_to.push_back(offsets_to.back() + result_vec.size());
if (include_counts)
{
auto & column_tuple = assert_cast<ColumnTuple &>(data_to);
IColumn & column_key = column_tuple.getColumn(0);
IColumn & column_count = column_tuple.getColumn(1);
IColumn & column_error = column_tuple.getColumn(2);
for (auto &elem : result_vec)
{
column_count.insert(elem.count);
column_error.insert(elem.error);
deserializeAndInsert<is_plain_column>(elem.key, column_key);
}
} else
{
for (auto & elem : result_vec)
{
deserializeAndInsert<is_plain_column>(elem.key, data_to);
}
}
}
};
@ -265,13 +377,14 @@ class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::Fie
public:
using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKDate(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKDate(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()))
params)
{}
};
@ -281,13 +394,14 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
public:
using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()))
params)
{}
};
@ -297,40 +411,41 @@ class AggregateFunctionTopKIPv4 : public AggregateFunctionTopK<DataTypeIPv4::Fie
public:
using AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKIPv4(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKIPv4(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeIPv4>()))
params)
{}
};
template <bool is_weighted>
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 reserved, bool include_counts, bool is_approx_top_k, const Array & params)
{
if (argument_types.empty())
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");
WhichDataType which(argument_types[0]);
if (which.idx == TypeIndex::Date)
return new AggregateFunctionTopKDate<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKDate<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
if (which.idx == TypeIndex::IPv4)
return new AggregateFunctionTopKIPv4<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKIPv4<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
/// Check that we can use plain version of AggregateFunctionTopKGeneric
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
else
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
}
template <bool is_weighted>
template <bool is_weighted, bool is_approx_top_k>
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
if (!is_weighted)
@ -346,14 +461,27 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
UInt64 threshold = 10; /// default values
UInt64 load_factor = 3;
bool include_counts = is_approx_top_k;
UInt64 reserved = threshold * load_factor;
if (!params.empty())
{
if (params.size() > 2)
if (params.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function '{}' requires two parameters or less", name);
"Aggregate function '{}' requires three parameters or less", name);
if (params.size() == 2)
threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (params.size() >= 2)
{
if (is_approx_top_k)
{
reserved = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (reserved < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too small parameter 'reserved' for aggregate function '{}' (got {}, minimum is 1)", name, reserved);
} else
{
load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
@ -361,25 +489,37 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too small parameter 'load_factor' for aggregate function '{}' (got {}, minimum is 1)", name, load_factor);
}
}
threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (params.size() == 3)
{
String option = params.at(2).safeGet<String>();
if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE)
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})",
name,
toString(DB::TOP_K_MAX_SIZE));
if (option == "counts")
include_counts = true;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} doesn't support a parameter: {}", name, option);
if (threshold == 0)
}
if (!is_approx_top_k)
{
reserved = threshold * load_factor;
}
if (reserved > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold > DB::TOP_K_MAX_SIZE)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE));
if (threshold == 0 || reserved == 0)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name);
}
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(
*argument_types[0], threshold, load_factor, argument_types, params));
*argument_types[0], threshold, reserved, include_counts, is_approx_top_k, argument_types, params));
if (!res)
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types, threshold, load_factor, params));
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types, threshold, reserved, include_counts, is_approx_top_k, params));
if (!res)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
@ -393,8 +533,11 @@ void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("topK", { createAggregateFunctionTopK<false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true>, properties });
factory.registerFunction("topK", { createAggregateFunctionTopK<false, false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true, false>, properties });
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,236 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/SingleValueData.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/IDataType.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int CORRUPTED_DATA;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
}
namespace
{
template <class ValueType>
struct AggregateFunctionArgMinMaxData
{
private:
SingleValueDataBaseMemoryBlock result_data;
ValueType value_data;
public:
SingleValueDataBase & result() { return result_data.get(); }
const SingleValueDataBase & result() const { return result_data.get(); }
ValueType & value() { return value_data; }
const ValueType & value() const { return value_data; }
[[noreturn]] explicit AggregateFunctionArgMinMaxData()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionArgMinMaxData initialized empty");
}
explicit AggregateFunctionArgMinMaxData(TypeIndex result_type) : value_data()
{
generateSingleValueFromTypeIndex(result_type, result_data);
}
~AggregateFunctionArgMinMaxData() { result().~SingleValueDataBase(); }
};
static_assert(
sizeof(AggregateFunctionArgMinMaxData<Int8>) <= 2 * SingleValueDataBase::MAX_STORAGE_SIZE,
"Incorrect size of AggregateFunctionArgMinMaxData struct");
/// Returns the first arg value found for the minimum/maximum value. Example: argMin(arg, value).
template <typename ValueData, bool isMin>
class AggregateFunctionArgMinMax final
: public IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>
{
private:
const DataTypePtr & type_val;
const SerializationPtr serialization_res;
const SerializationPtr serialization_val;
const TypeIndex result_type_index;
using Base = IAggregateFunctionDataHelper<AggregateFunctionArgMinMaxData<ValueData>, AggregateFunctionArgMinMax<ValueData, isMin>>;
public:
explicit AggregateFunctionArgMinMax(const DataTypes & argument_types_)
: Base(argument_types_, {}, argument_types_[0])
, type_val(this->argument_types[1])
, serialization_res(this->argument_types[0]->getDefaultSerialization())
, serialization_val(this->argument_types[1]->getDefaultSerialization())
, result_type_index(WhichDataType(this->argument_types[0]).idx)
{
if (!type_val->isComparable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of aggregate function {} because the values of that data type are not comparable",
type_val->getName(),
getName());
}
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
new (place) AggregateFunctionArgMinMaxData<ValueData>(result_type_index);
}
String getName() const override
{
if constexpr (isMin)
return "argMin";
else
return "argMax";
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (isMin)
{
if (this->data(place).value().setIfSmaller(*columns[1], row_num, arena))
this->data(place).result().set(*columns[0], row_num, arena);
}
else
{
if (this->data(place).value().setIfGreater(*columns[1], row_num, arena))
this->data(place).result().set(*columns[0], row_num, arena);
}
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
std::optional<size_t> idx;
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
if constexpr (isMin)
idx = this->data(place).value().getSmallestIndexNotNullIf(*columns[1], nullptr, if_map.data(), row_begin, row_end);
else
idx = this->data(place).value().getGreatestIndexNotNullIf(*columns[1], nullptr, if_map.data(), row_begin, row_end);
}
else
{
if constexpr (isMin)
idx = this->data(place).value().getSmallestIndex(*columns[1], row_begin, row_end);
else
idx = this->data(place).value().getGreatestIndex(*columns[1], row_begin, row_end);
}
if (idx)
add(place, columns, *idx, arena);
}
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override
{
std::optional<size_t> idx;
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
if constexpr (isMin)
idx = this->data(place).value().getSmallestIndexNotNullIf(*columns[1], null_map, if_map.data(), row_begin, row_end);
else
idx = this->data(place).value().getGreatestIndexNotNullIf(*columns[1], null_map, if_map.data(), row_begin, row_end);
}
else
{
if constexpr (isMin)
idx = this->data(place).value().getSmallestIndexNotNullIf(*columns[1], null_map, nullptr, row_begin, row_end);
else
idx = this->data(place).value().getGreatestIndexNotNullIf(*columns[1], null_map, nullptr, row_begin, row_end);
}
if (idx)
add(place, columns, *idx, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if constexpr (isMin)
{
if (this->data(place).value().setIfSmaller(this->data(rhs).value(), arena))
this->data(place).result().set(this->data(rhs).result(), arena);
}
else
{
if (this->data(place).value().setIfGreater(this->data(rhs).value(), arena))
this->data(place).result().set(this->data(rhs).result(), arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).result().write(buf, *serialization_res);
this->data(place).value().write(buf, *serialization_val);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).result().read(buf, *serialization_res, arena);
this->data(place).value().read(buf, *serialization_val, arena);
if (unlikely(this->data(place).value().has() != this->data(place).result().has()))
throw Exception(
ErrorCodes::CORRUPTED_DATA,
"Invalid state of the aggregate function {}: has_value ({}) != has_result ({})",
getName(),
this->data(place).value().has(),
this->data(place).result().has());
}
bool allocatesMemoryInArena() const override
{
return singleValueTypeAllocatesMemoryInArena(result_type_index) || ValueData::allocatesMemoryInArena();
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).result().insertResultInto(to);
}
};
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionArgMinMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionArgMinMax, /* unary */ false, isMin>(
name, argument_types, parameters, settings));
}
}
void registerAggregateFunctionsArgMinArgMax(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("argMin", {createAggregateFunctionArgMinMax<true>, properties});
factory.registerFunction("argMax", {createAggregateFunctionArgMinMax<false>, properties});
}
}

View File

@ -0,0 +1,202 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/SingleValueData.h>
#include <Common/Concepts.h>
#include <Common/findExtreme.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
namespace
{
template <typename Data, bool isMin>
class AggregateFunctionMinMax final : public IAggregateFunctionDataHelper<Data, AggregateFunctionMinMax<Data, isMin>>
{
private:
SerializationPtr serialization;
public:
explicit AggregateFunctionMinMax(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionMinMax<Data, isMin>>(argument_types_, {}, argument_types_[0])
, serialization(this->result_type->getDefaultSerialization())
{
if (!this->result_type->isComparable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable",
this->result_type->getName(),
getName());
}
String getName() const override
{
if constexpr (isMin)
return "min";
else
return "max";
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (isMin)
this->data(place).setIfSmaller(*columns[0], row_num, arena);
else
this->data(place).setIfGreater(*columns[0], row_num, arena);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
if constexpr (isMin)
this->data(place).setSmallestNotNullIf(*columns[0], nullptr, if_map.data(), row_begin, row_end, arena);
else
this->data(place).setGreatestNotNullIf(*columns[0], nullptr, if_map.data(), row_begin, row_end, arena);
}
else
{
if constexpr (isMin)
this->data(place).setSmallest(*columns[0], row_begin, row_end, arena);
else
this->data(place).setGreatest(*columns[0], row_begin, row_end, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override
{
if (if_argument_pos >= 0)
{
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
if constexpr (isMin)
this->data(place).setSmallestNotNullIf(*columns[0], null_map, if_map.data(), row_begin, row_end, arena);
else
this->data(place).setGreatestNotNullIf(*columns[0], null_map, if_map.data(), row_begin, row_end, arena);
}
else
{
if constexpr (isMin)
this->data(place).setSmallestNotNullIf(*columns[0], null_map, nullptr, row_begin, row_end, arena);
else
this->data(place).setGreatestNotNullIf(*columns[0], null_map, nullptr, row_begin, row_end, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if constexpr (isMin)
this->data(place).setIfSmaller(this->data(rhs), arena);
else
this->data(place).setIfGreater(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (!Data::is_compilable)
return false;
else
return Data::isCompilable(*this->argument_types[0]);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
Data::compileCreate(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (Data::is_compilable)
if constexpr (isMin)
Data::compileMin(builder, aggregate_data_ptr, arguments[0].value);
else
Data::compileMax(builder, aggregate_data_ptr, arguments[0].value);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (Data::is_compilable)
if constexpr (isMin)
Data::compileMinMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
else
Data::compileMaxMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
return Data::compileGetResult(builder, aggregate_data_ptr);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
#endif
};
template <bool isMin>
AggregateFunctionPtr createAggregateFunctionMinMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleValue<AggregateFunctionMinMax, /* unary */ true, isMin>(name, argument_types, parameters, settings));
}
}
void registerAggregateFunctionsMinMax(AggregateFunctionFactory & factory)
{
factory.registerFunction("min", createAggregateFunctionMinMax<true>, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("max", createAggregateFunctionMinMax<false>, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -1,93 +0,0 @@
#include "AggregateFunctionArgMinMax.h"
#include "AggregateFunctionCombinatorFactory.h"
#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <template <typename> class Data>
class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator
{
public:
String getName() const override { return Data<SingleValueDataGeneric>::name(); }
DataTypes transformArguments(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function with {} suffix",
getName());
return DataTypes(arguments.begin(), arguments.end() - 1);
}
AggregateFunctionPtr transformAggregateFunction(
const AggregateFunctionPtr & nested_function,
const AggregateFunctionProperties &,
const DataTypes & arguments,
const Array & params) const override
{
const DataTypePtr & argument_type = arguments.back();
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<TYPE>>>>(nested_function, arguments, params); /// NOLINT
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<DataTypeDate::FieldType>>>>(
nested_function, arguments, params);
if (which.idx == TypeIndex::DateTime)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<DataTypeDateTime::FieldType>>>>(
nested_function, arguments, params);
if (which.idx == TypeIndex::DateTime64)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<DateTime64>>>>(nested_function, arguments, params);
if (which.idx == TypeIndex::Decimal32)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<Decimal32>>>>(nested_function, arguments, params);
if (which.idx == TypeIndex::Decimal64)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<Decimal64>>>>(nested_function, arguments, params);
if (which.idx == TypeIndex::Decimal128)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<Decimal128>>>>(nested_function, arguments, params);
if (which.idx == TypeIndex::Decimal256)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataFixed<Decimal256>>>>(nested_function, arguments, params);
if (which.idx == TypeIndex::String)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric>>>(nested_function, arguments, params);
}
};
template <typename Data>
struct AggregateFunctionArgMinDataCapitalized : AggregateFunctionMinData<Data>
{
static const char * name() { return "ArgMin"; }
};
template <typename Data>
struct AggregateFunctionArgMaxDataCapitalized : AggregateFunctionMaxData<Data>
{
static const char * name() { return "ArgMax"; }
};
}
void registerAggregateFunctionCombinatorMinMax(AggregateFunctionCombinatorFactory & factory)
{
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorArgMinMax<AggregateFunctionArgMinDataCapitalized>>());
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorArgMinMax<AggregateFunctionArgMaxDataCapitalized>>());
}
}

View File

@ -1,111 +0,0 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename Key>
class AggregateFunctionArgMinMax final : public IAggregateFunctionHelper<AggregateFunctionArgMinMax<Key>>
{
private:
AggregateFunctionPtr nested_function;
SerializationPtr serialization;
size_t key_col;
size_t key_offset;
Key & key(AggregateDataPtr __restrict place) const { return *reinterpret_cast<Key *>(place + key_offset); }
const Key & key(ConstAggregateDataPtr __restrict place) const { return *reinterpret_cast<const Key *>(place + key_offset); }
public:
AggregateFunctionArgMinMax(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<AggregateFunctionArgMinMax<Key>>{arguments, params, nested_function_->getResultType()}
, nested_function{nested_function_}
, serialization(arguments.back()->getDefaultSerialization())
, key_col{arguments.size() - 1}
, key_offset{(nested_function->sizeOfData() + alignof(Key) - 1) / alignof(Key) * alignof(Key)}
{
}
String getName() const override { return nested_function->getName() + Key::name(); }
bool isState() const override { return nested_function->isState(); }
bool isVersioned() const override { return nested_function->isVersioned(); }
size_t getVersionFromRevision(size_t revision) const override { return nested_function->getVersionFromRevision(revision); }
size_t getDefaultVersion() const override { return nested_function->getDefaultVersion(); }
bool allocatesMemoryInArena() const override { return nested_function->allocatesMemoryInArena() || Key::allocatesMemoryInArena(); }
bool hasTrivialDestructor() const override { return nested_function->hasTrivialDestructor(); }
size_t sizeOfData() const override { return key_offset + sizeof(Key); }
size_t alignOfData() const override { return nested_function->alignOfData(); }
void create(AggregateDataPtr __restrict place) const override
{
nested_function->create(place);
new (place + key_offset) Key;
}
void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_function->destroy(place); }
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override { nested_function->destroyUpToState(place); }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if (key(place).changeIfBetter(*columns[key_col], row_num, arena))
{
nested_function->destroy(place);
nested_function->create(place);
nested_function->add(place, columns, row_num, arena);
}
else if (key(place).isEqualTo(*columns[key_col], row_num))
{
nested_function->add(place, columns, row_num, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if (key(place).changeIfBetter(key(rhs), arena))
{
nested_function->destroy(place);
nested_function->create(place);
nested_function->merge(place, rhs, arena);
}
else if (key(place).isEqualTo(key(rhs)))
{
nested_function->merge(place, rhs, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
{
nested_function->serialize(place, buf, version);
key(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena * arena) const override
{
nested_function->deserialize(place, buf, version, arena);
key(place).read(buf, *serialization, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
nested_function->insertResultInto(place, to, arena);
}
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
nested_function->insertMergeResultInto(place, to, arena);
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
};
}

View File

@ -0,0 +1,212 @@
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <AggregateFunctions/SingleValueData.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
struct AggregateFunctionCombinatorArgMinArgMaxData
{
private:
SingleValueDataBaseMemoryBlock v_data;
public:
explicit AggregateFunctionCombinatorArgMinArgMaxData(TypeIndex value_type) { generateSingleValueFromTypeIndex(value_type, v_data); }
~AggregateFunctionCombinatorArgMinArgMaxData() { data().~SingleValueDataBase(); }
SingleValueDataBase & data() { return v_data.get(); }
const SingleValueDataBase & data() const { return v_data.get(); }
};
template <bool isMin>
class AggregateFunctionCombinatorArgMinArgMax final : public IAggregateFunctionHelper<AggregateFunctionCombinatorArgMinArgMax<isMin>>
{
using Key = AggregateFunctionCombinatorArgMinArgMaxData;
private:
AggregateFunctionPtr nested_function;
SerializationPtr serialization;
const size_t key_col;
const size_t key_offset;
const TypeIndex key_type_index;
AggregateFunctionCombinatorArgMinArgMaxData & data(AggregateDataPtr __restrict place) const /// NOLINT
{
return *reinterpret_cast<Key *>(place + key_offset);
}
const AggregateFunctionCombinatorArgMinArgMaxData & data(ConstAggregateDataPtr __restrict place) const
{
return *reinterpret_cast<const Key *>(place + key_offset);
}
public:
AggregateFunctionCombinatorArgMinArgMax(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<AggregateFunctionCombinatorArgMinArgMax<isMin>>{arguments, params, nested_function_->getResultType()}
, nested_function{nested_function_}
, serialization(arguments.back()->getDefaultSerialization())
, key_col{arguments.size() - 1}
, key_offset{((nested_function->sizeOfData() + alignof(Key) - 1) / alignof(Key)) * alignof(Key)}
, key_type_index(WhichDataType(arguments[key_col]).idx)
{
if (!arguments[key_col]->isComparable())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} for combinator {} because the values of that data type are not comparable",
arguments[key_col]->getName(),
getName());
}
String getName() const override
{
if constexpr (isMin)
return "ArgMin";
else
return "ArgMax";
}
bool isState() const override { return nested_function->isState(); }
bool isVersioned() const override { return nested_function->isVersioned(); }
size_t getVersionFromRevision(size_t revision) const override { return nested_function->getVersionFromRevision(revision); }
size_t getDefaultVersion() const override { return nested_function->getDefaultVersion(); }
bool allocatesMemoryInArena() const override
{
return nested_function->allocatesMemoryInArena() || singleValueTypeAllocatesMemoryInArena(key_type_index);
}
bool hasTrivialDestructor() const override
{
return nested_function->hasTrivialDestructor() && /*false*/ std::is_trivially_destructible_v<SingleValueDataBase>;
}
size_t sizeOfData() const override { return key_offset + sizeof(Key); }
size_t alignOfData() const override { return std::max(nested_function->alignOfData(), alignof(SingleValueDataBaseMemoryBlock)); }
void create(AggregateDataPtr __restrict place) const override
{
nested_function->create(place);
new (place + key_offset) Key(key_type_index);
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
data(place).~Key();
nested_function->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
data(place).~Key();
nested_function->destroyUpToState(place);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if ((isMin && data(place).data().setIfSmaller(*columns[key_col], row_num, arena))
|| (!isMin && data(place).data().setIfGreater(*columns[key_col], row_num, arena)))
{
nested_function->destroy(place);
nested_function->create(place);
nested_function->add(place, columns, row_num, arena);
}
else if (data(place).data().isEqualTo(*columns[key_col], row_num))
{
nested_function->add(place, columns, row_num, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if ((isMin && data(place).data().setIfSmaller(data(rhs).data(), arena))
|| (!isMin && data(place).data().setIfGreater(data(rhs).data(), arena)))
{
nested_function->destroy(place);
nested_function->create(place);
nested_function->merge(place, rhs, arena);
}
else if (data(place).data().isEqualTo(data(rhs).data()))
{
nested_function->merge(place, rhs, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
{
nested_function->serialize(place, buf, version);
data(place).data().write(buf, *serialization);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version, Arena * arena) const override
{
nested_function->deserialize(place, buf, version, arena);
data(place).data().read(buf, *serialization, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
nested_function->insertResultInto(place, to, arena);
}
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
nested_function->insertMergeResultInto(place, to, arena);
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
};
template <bool isMin>
class CombinatorArgMinArgMax final : public IAggregateFunctionCombinator
{
public:
String getName() const override
{
if constexpr (isMin)
return "ArgMin";
else
return "ArgMax";
}
DataTypes transformArguments(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function with {} suffix",
getName());
return DataTypes(arguments.begin(), arguments.end() - 1);
}
AggregateFunctionPtr transformAggregateFunction(
const AggregateFunctionPtr & nested_function,
const AggregateFunctionProperties &,
const DataTypes & arguments,
const Array & params) const override
{
return std::make_shared<AggregateFunctionCombinatorArgMinArgMax<isMin>>(nested_function, arguments, params);
}
};
}
void registerAggregateFunctionCombinatorsArgMinArgMax(AggregateFunctionCombinatorFactory & factory)
{
factory.registerCombinator(std::make_shared<CombinatorArgMinArgMax<true>>());
factory.registerCombinator(std::make_shared<CombinatorArgMinArgMax<false>>());
}
}

View File

@ -43,8 +43,8 @@ template <bool result_is_nullable, bool serialize_flag, typename Derived>
class AggregateFunctionNullBase : public IAggregateFunctionHelper<Derived>
{
protected:
AggregateFunctionPtr nested_function;
size_t prefix_size;
const AggregateFunctionPtr nested_function;
const size_t prefix_size;
/** In addition to data for nested aggregate function, we keep a flag
* indicating - was there at least one non-NULL value accumulated.
@ -55,12 +55,18 @@ protected:
AggregateDataPtr nestedPlace(AggregateDataPtr __restrict place) const noexcept
{
if constexpr (result_is_nullable)
return place + prefix_size;
else
return place;
}
ConstAggregateDataPtr nestedPlace(ConstAggregateDataPtr __restrict place) const noexcept
{
if constexpr (result_is_nullable)
return place + prefix_size;
else
return place;
}
static void initFlag(AggregateDataPtr __restrict place) noexcept
@ -87,11 +93,8 @@ public:
AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<Derived>(arguments, params, createResultType(nested_function_))
, nested_function{nested_function_}
, prefix_size(result_is_nullable ? nested_function->alignOfData() : 0)
{
if constexpr (result_is_nullable)
prefix_size = nested_function->alignOfData();
else
prefix_size = 0;
}
String getName() const override

View File

@ -1,119 +0,0 @@
#pragma once
#include <AggregateFunctions/AggregateFunctionMinMaxAny.h>
#include <AggregateFunctions/AggregateFunctionArgMinMax.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
struct Settings;
/// min, max, any, anyLast, anyHeavy, etc...
template <template <typename> class AggregateFunctionTemplate, template <typename, bool...> class Data>
static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<Data<SingleValueDataFixed<TYPE>>>(argument_type); /// NOLINT
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<DataTypeDate::FieldType>>>(argument_type);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<DataTypeDateTime::FieldType>>>(argument_type);
if (which.idx == TypeIndex::DateTime64)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<DateTime64>>>(argument_type);
if (which.idx == TypeIndex::Decimal32)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<Decimal32>>>(argument_type);
if (which.idx == TypeIndex::Decimal64)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<Decimal64>>>(argument_type);
if (which.idx == TypeIndex::Decimal128)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<Decimal128>>>(argument_type);
if (which.idx == TypeIndex::Decimal256)
return new AggregateFunctionTemplate<Data<SingleValueDataFixed<Decimal256>>>(argument_type);
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
}
/// argMin, argMax
template <template <typename> class MinMaxData, typename ResData>
static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type)
{
WhichDataType which(val_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<TYPE>>>>(res_type, val_type); /// NOLINT
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<DataTypeDate::FieldType>>>>(res_type, val_type);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<DataTypeDateTime::FieldType>>>>(res_type, val_type);
if (which.idx == TypeIndex::DateTime64)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<DateTime64>>>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal32)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<Decimal32>>>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal64)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<Decimal64>>>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal128)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<Decimal128>>>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal256)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataFixed<Decimal256>>>>(res_type, val_type);
if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
}
template <template <typename> class MinMaxData>
static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertBinary(name, argument_types);
const DataTypePtr & res_type = argument_types[0];
const DataTypePtr & val_type = argument_types[1];
WhichDataType which(res_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<TYPE>>(res_type, val_type); /// NOLINT
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<DataTypeDate::FieldType>>(res_type, val_type);
if (which.idx == TypeIndex::DateTime)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<DataTypeDateTime::FieldType>>(res_type, val_type);
if (which.idx == TypeIndex::DateTime64)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<DateTime64>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal32)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<Decimal32>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal64)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<Decimal64>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal128)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<Decimal128>>(res_type, val_type);
if (which.idx == TypeIndex::Decimal256)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataFixed<Decimal256>>(res_type, val_type);
if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,394 @@
#pragma once
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <base/StringRef.h>
namespace DB
{
class Arena;
class ReadBuffer;
struct Settings;
class WriteBuffer;
/// Base class for Aggregation data that stores one of passed values: min, any, argMax...
/// It's setup as a virtual class so we can avoid templates when we need to extend them (argMax, SingleValueOrNull)
struct SingleValueDataBase
{
/// Any subclass (numeric, string, generic) must be smaller than MAX_STORAGE_SIZE
/// We use this knowledge to create composite data classes that use them directly by reserving a 'memory_block'
/// For example argMin holds 1 of these (for the result), while keeping a template for the value
static constexpr UInt32 MAX_STORAGE_SIZE = 64;
virtual ~SingleValueDataBase() { }
virtual bool has() const = 0;
virtual void insertResultInto(IColumn &) const = 0;
virtual void write(WriteBuffer &, const ISerialization &) const = 0;
virtual void read(ReadBuffer &, const ISerialization &, Arena *) = 0;
virtual bool isEqualTo(const IColumn & column, size_t row_num) const = 0;
virtual bool isEqualTo(const SingleValueDataBase &) const = 0;
virtual void set(const IColumn &, size_t row_num, Arena *) = 0;
virtual void set(const SingleValueDataBase &, Arena *) = 0;
virtual bool setIfSmaller(const IColumn &, size_t row_num, Arena *) = 0;
virtual bool setIfSmaller(const SingleValueDataBase &, Arena *) = 0;
virtual bool setIfGreater(const IColumn &, size_t row_num, Arena *) = 0;
virtual bool setIfGreater(const SingleValueDataBase &, Arena *) = 0;
/// Given a column, sets the internal value to the smallest or greatest value from the column
/// Used to implement batch min/max
virtual void setSmallest(const IColumn & column, size_t row_begin, size_t row_end, Arena * arena);
virtual void setGreatest(const IColumn & column, size_t row_begin, size_t row_end, Arena * arena);
virtual void setSmallestNotNullIf(const IColumn &, const UInt8 * __restrict, const UInt8 * __restrict, size_t, size_t, Arena *);
virtual void setGreatestNotNullIf(const IColumn &, const UInt8 * __restrict, const UInt8 * __restrict, size_t, size_t, Arena *);
/// Given a column returns the index of the smallest or greatest value in it
/// Doesn't return anything if the column is empty
/// There are used to implement argMin / argMax
virtual std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
virtual std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
virtual std::optional<size_t> getSmallestIndexNotNullIf(
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
virtual std::optional<size_t> getGreatestIndexNotNullIf(
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
};
#define FOR_SINGLE_VALUE_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Float32) \
M(Float64) \
M(Decimal32) \
M(Decimal64) \
M(Decimal128) \
M(Decimal256) \
M(DateTime64)
/// For numeric values (without inheritance, for performance sensitive functions and JIT)
template <typename T>
struct SingleValueDataFixed
{
static constexpr bool is_compilable = true;
using Self = SingleValueDataFixed;
using ColVecType = ColumnVectorOrDecimal<T>;
T value = T{};
/// We need to remember if at least one value has been passed.
/// This is necessary for AggregateFunctionIf, merging states, JIT (where simple add is used), etc
bool has_value = false;
bool has() const { return has_value; }
void insertResultInto(IColumn & to) const;
void write(WriteBuffer & buf, const ISerialization &) const;
void read(ReadBuffer & buf, const ISerialization &, Arena *);
bool isEqualTo(const IColumn & column, size_t index) const;
bool isEqualTo(const Self & to) const;
void set(const IColumn & column, size_t row_num, Arena *);
void set(const Self & to, Arena *);
bool setIfSmaller(const T & to);
bool setIfGreater(const T & to);
bool setIfSmaller(const Self & to, Arena * arena);
bool setIfGreater(const Self & to, Arena * arena);
bool setIfSmaller(const IColumn & column, size_t row_num, Arena * arena);
bool setIfGreater(const IColumn & column, size_t row_num, Arena * arena);
void setSmallest(const IColumn & column, size_t row_begin, size_t row_end, Arena *);
void setGreatest(const IColumn & column, size_t row_begin, size_t row_end, Arena *);
void setSmallestNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end,
Arena *);
void setGreatestNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end,
Arena *);
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const;
std::optional<size_t> getSmallestIndexNotNullIf(
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
std::optional<size_t> getGreatestIndexNotNullIf(
const IColumn & column, const UInt8 * __restrict null_map, const UInt8 * __restrict if_map, size_t row_begin, size_t row_end) const;
static bool allocatesMemoryInArena() { return false; }
#if USE_EMBEDDED_COMPILER
static constexpr size_t has_value_offset = offsetof(Self, has_value);
static constexpr size_t value_offset = offsetof(Self, value);
static bool isCompilable(const IDataType & type);
static llvm::Value * getValuePtrFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static llvm::Value * getValueFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static llvm::Value * getHasValuePtrFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static llvm::Value * getHasValueFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr);
static void compileSetValueFromNumber(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
static void
compileSetValueFromAggregation(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * aggregate_data_src_ptr);
static void compileAny(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
static void compileAnyMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr);
static void compileAnyLast(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
static void
compileAnyLastMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr);
template <bool isMin>
static void compileMinMax(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
template <bool isMin>
static void
compileMinMaxMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr);
static void compileMin(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
static void compileMinMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr);
static void compileMax(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check);
static void compileMaxMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr);
#endif
};
#define DISPATCH(TYPE) \
extern template struct SingleValueDataFixed<TYPE>; \
static_assert( \
sizeof(SingleValueDataFixed<TYPE>) <= SingleValueDataBase::MAX_STORAGE_SIZE, "Incorrect size of SingleValueDataFixed struct");
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
/// For numeric values inheriting from SingleValueDataBase
template <typename T>
struct SingleValueDataNumeric final : public SingleValueDataBase
{
using Self = SingleValueDataNumeric<T>;
using Base = SingleValueDataFixed<T>;
private:
/// 32 bytes for types of 256 bits, + 8 bytes for the virtual table pointer.
static constexpr size_t base_memory_reserved_size = 40;
struct alignas(alignof(Base)) PrivateMemory
{
char memory[base_memory_reserved_size];
Base & get() { return *reinterpret_cast<Base *>(memory); }
const Base & get() const { return *reinterpret_cast<const Base *>(memory); }
};
static_assert(sizeof(Base) <= base_memory_reserved_size);
PrivateMemory memory;
public:
static constexpr bool is_compilable = false;
SingleValueDataNumeric();
~SingleValueDataNumeric() override;
bool has() const override;
void insertResultInto(IColumn & to) const override;
void write(WriteBuffer & buf, const ISerialization & serialization) const override;
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena) override;
bool isEqualTo(const IColumn & column, size_t index) const override;
bool isEqualTo(const SingleValueDataBase & to) const override;
void set(const IColumn & column, size_t row_num, Arena * arena) override;
void set(const SingleValueDataBase & to, Arena * arena) override;
bool setIfSmaller(const SingleValueDataBase & to, Arena * arena) override;
bool setIfGreater(const SingleValueDataBase & to, Arena * arena) override;
bool setIfSmaller(const IColumn & column, size_t row_num, Arena * arena) override;
bool setIfGreater(const IColumn & column, size_t row_num, Arena * arena) override;
void setSmallest(const IColumn & column, size_t row_begin, size_t row_end, Arena * arena) override;
void setGreatest(const IColumn & column, size_t row_begin, size_t row_end, Arena * arena) override;
void setSmallestNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end,
Arena * arena) override;
void setGreatestNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end,
Arena * arena) override;
std::optional<size_t> getSmallestIndex(const IColumn & column, size_t row_begin, size_t row_end) const override;
std::optional<size_t> getGreatestIndex(const IColumn & column, size_t row_begin, size_t row_end) const override;
std::optional<size_t> getSmallestIndexNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end) const override;
std::optional<size_t> getGreatestIndexNotNullIf(
const IColumn & column,
const UInt8 * __restrict null_map,
const UInt8 * __restrict if_map,
size_t row_begin,
size_t row_end) const override;
static bool allocatesMemoryInArena() { return false; }
};
#define DISPATCH(TYPE) \
extern template struct SingleValueDataNumeric<TYPE>; \
static_assert( \
sizeof(SingleValueDataNumeric<TYPE>) <= SingleValueDataBase::MAX_STORAGE_SIZE, "Incorrect size of SingleValueDataNumeric struct");
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
/** For strings. Short strings are stored in the object itself, and long strings are allocated separately.
* NOTE It could also be suitable for arrays of numbers.
// */
struct SingleValueDataString final : public SingleValueDataBase
{
static constexpr bool is_compilable = false;
using Self = SingleValueDataString;
/// 0 size indicates that there is no value. Empty string must have terminating '\0' and, therefore, size of empty string is 1
UInt32 size = 0;
UInt32 capacity = 0; /// power of two or zero
char * large_data; /// Always allocated in an arena
//// TODO: Maybe instead of a virtual class we need to go with a std::variant of the 3 to avoid reserving space for the vtable
static constexpr UInt32 MAX_SMALL_STRING_SIZE
= SingleValueDataBase::MAX_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data) - sizeof(SingleValueDataBase);
static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits<Int32>::max();
private:
char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.
char * getDataMutable();
const char * getData() const;
StringRef getStringRef() const;
void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena);
void changeImpl(StringRef value, Arena * arena);
public:
bool has() const override { return size != 0; }
void insertResultInto(IColumn & to) const override;
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const override;
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena) override;
bool isEqualTo(const IColumn & column, size_t row_num) const override;
bool isEqualTo(const SingleValueDataBase &) const override;
void set(const IColumn & column, size_t row_num, Arena * arena) override;
void set(const SingleValueDataBase &, Arena * arena) override;
bool setIfSmaller(const IColumn & column, size_t row_num, Arena * arena) override;
bool setIfSmaller(const SingleValueDataBase &, Arena * arena) override;
bool setIfGreater(const IColumn & column, size_t row_num, Arena * arena) override;
bool setIfGreater(const SingleValueDataBase &, Arena * arena) override;
static bool allocatesMemoryInArena() { return true; }
};
static_assert(sizeof(SingleValueDataString) == SingleValueDataBase::MAX_STORAGE_SIZE, "Incorrect size of SingleValueDataString struct");
/// For any other value types.
struct SingleValueDataGeneric final : public SingleValueDataBase
{
static constexpr bool is_compilable = false;
private:
using Self = SingleValueDataGeneric;
Field value;
public:
bool has() const override { return !value.isNull(); }
void insertResultInto(IColumn & to) const override;
void write(WriteBuffer & buf, const ISerialization & serialization) const override;
void read(ReadBuffer & buf, const ISerialization & serialization, Arena *) override;
bool isEqualTo(const IColumn & column, size_t row_num) const override;
bool isEqualTo(const SingleValueDataBase & other) const override;
void set(const IColumn & column, size_t row_num, Arena *) override;
void set(const SingleValueDataBase & other, Arena *) override;
bool setIfSmaller(const IColumn & column, size_t row_num, Arena * arena) override;
bool setIfSmaller(const SingleValueDataBase & other, Arena *) override;
bool setIfGreater(const IColumn & column, size_t row_num, Arena * arena) override;
bool setIfGreater(const SingleValueDataBase & other, Arena *) override;
static bool allocatesMemoryInArena() { return false; }
};
static_assert(sizeof(SingleValueDataGeneric) <= SingleValueDataBase::MAX_STORAGE_SIZE, "Incorrect size of SingleValueDataGeneric struct");
/// min, max, any, anyLast, anyHeavy, etc...
template <template <typename, bool...> class AggregateFunctionTemplate, bool unary, bool... isMin>
static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
if constexpr (unary)
assertUnary(name, argument_types);
else
assertBinary(name, argument_types);
const DataTypePtr & value_type = unary ? argument_types[0] : argument_types[1];
WhichDataType which(value_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return new AggregateFunctionTemplate<SingleValueDataFixed<TYPE>, isMin...>(argument_types); /// NOLINT
FOR_SINGLE_VALUE_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date)
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDate::FieldType>, isMin...>(argument_types);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTemplate<SingleValueDataFixed<DataTypeDateTime::FieldType>, isMin...>(argument_types);
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<SingleValueDataString, isMin...>(argument_types);
return new AggregateFunctionTemplate<SingleValueDataGeneric, isMin...>(argument_types);
}
/// Helper to allocate enough memory to store any derived class
struct SingleValueDataBaseMemoryBlock
{
std::aligned_union_t<
SingleValueDataBase::MAX_STORAGE_SIZE,
SingleValueDataNumeric<Decimal256>, /// We check all types in generateSingleValueFromTypeIndex
SingleValueDataString,
SingleValueDataGeneric>
memory;
SingleValueDataBase & get() { return *reinterpret_cast<SingleValueDataBase *>(&memory); }
const SingleValueDataBase & get() const { return *reinterpret_cast<const SingleValueDataBase *>(&memory); }
};
static_assert(alignof(SingleValueDataBaseMemoryBlock) == 8);
/// For Data classes that want to compose on top of SingleValueDataBase values, like argMax or singleValueOrNull
/// It will build the object based on the type idx on the memory block provided
void generateSingleValueFromTypeIndex(TypeIndex idx, SingleValueDataBaseMemoryBlock & data);
bool singleValueTypeAllocatesMemoryInArena(TypeIndex idx);
}

View File

@ -39,9 +39,11 @@ void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
void registerAggregateFunctionRate(AggregateFunctionFactory &);
void registerAggregateFunctionsMin(AggregateFunctionFactory &);
void registerAggregateFunctionsMax(AggregateFunctionFactory &);
void registerAggregateFunctionsMinMax(AggregateFunctionFactory &);
void registerAggregateFunctionsArgMinArgMax(AggregateFunctionFactory &);
void registerAggregateFunctionsAny(AggregateFunctionFactory &);
void registerAggregateFunctionAnyHeavy(AggregateFunctionFactory &);
void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsThirdMoment(AggregateFunctionFactory &);
@ -99,7 +101,7 @@ void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactor
void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorMap(AggregateFunctionCombinatorFactory & factory);
void registerAggregateFunctionCombinatorMinMax(AggregateFunctionCombinatorFactory & factory);
void registerAggregateFunctionCombinatorsArgMinArgMax(AggregateFunctionCombinatorFactory & factory);
void registerWindowFunctions(AggregateFunctionFactory & factory);
@ -138,9 +140,11 @@ void registerAggregateFunctions()
registerAggregateFunctionsSequenceMatch(factory);
registerAggregateFunctionWindowFunnel(factory);
registerAggregateFunctionRate(factory);
registerAggregateFunctionsMin(factory);
registerAggregateFunctionsMax(factory);
registerAggregateFunctionsMinMax(factory);
registerAggregateFunctionsArgMinArgMax(factory);
registerAggregateFunctionsAny(factory);
registerAggregateFunctionAnyHeavy(factory);
registerAggregateFunctionsAnyRespectNulls(factory);
registerAggregateFunctionsStatisticsStable(factory);
registerAggregateFunctionsStatisticsSecondMoment(factory);
registerAggregateFunctionsStatisticsThirdMoment(factory);
@ -203,7 +207,7 @@ void registerAggregateFunctions()
registerAggregateFunctionCombinatorResample(factory);
registerAggregateFunctionCombinatorDistinct(factory);
registerAggregateFunctionCombinatorMap(factory);
registerAggregateFunctionCombinatorMinMax(factory);
registerAggregateFunctionCombinatorsArgMinArgMax(factory);
}
}

View File

@ -0,0 +1,186 @@
#include <Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/UnionNode.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
/// Try to eliminate min/max/any/anyLast.
class EliminateFunctionVisitor : public InDepthQueryTreeVisitorWithContext<EliminateFunctionVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<EliminateFunctionVisitor>;
using Base::Base;
using GroupByKeysStack = std::vector<QueryTreeNodePtrWithHashSet>;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_aggregators_of_group_by_keys)
return;
/// Collect group by keys.
auto * query_node = node->as<QueryNode>();
if (!query_node)
return;
if (!query_node->hasGroupBy())
{
group_by_keys_stack.push_back({});
}
else if (query_node->isGroupByWithTotals() || query_node->isGroupByWithCube() || query_node->isGroupByWithRollup())
{
/// Keep aggregator if group by is with totals/cube/rollup.
group_by_keys_stack.push_back({});
}
else
{
QueryTreeNodePtrWithHashSet group_by_keys;
for (auto & group_key : query_node->getGroupBy().getNodes())
{
/// For grouping sets case collect only keys that are presented in every set.
if (auto * list = group_key->as<ListNode>())
{
QueryTreeNodePtrWithHashSet common_keys_set;
for (auto & group_elem : list->getNodes())
{
if (group_by_keys.contains(group_elem))
common_keys_set.insert(group_elem);
}
group_by_keys = std::move(common_keys_set);
}
else
{
group_by_keys.insert(group_key);
}
}
group_by_keys_stack.push_back(std::move(group_by_keys));
}
}
/// Now we visit all nodes in QueryNode, we should remove group_by_keys from stack.
void leaveImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_aggregators_of_group_by_keys)
return;
if (node->getNodeType() == QueryTreeNodeType::FUNCTION)
{
if (aggregationCanBeEliminated(node, group_by_keys_stack.back()))
node = node->as<FunctionNode>()->getArguments().getNodes()[0];
}
else if (node->getNodeType() == QueryTreeNodeType::QUERY)
{
group_by_keys_stack.pop_back();
}
}
static bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child)
{
/// Skip ArrayJoin.
return !child->as<ArrayJoinNode>();
}
private:
struct NodeWithInfo
{
QueryTreeNodePtr node;
bool parents_are_only_deterministic = false;
};
bool aggregationCanBeEliminated(QueryTreeNodePtr & node, const QueryTreeNodePtrWithHashSet & group_by_keys)
{
if (group_by_keys.empty())
return false;
auto * function = node->as<FunctionNode>();
if (!function || !function->isAggregateFunction())
return false;
if (!(function->getFunctionName() == "min"
|| function->getFunctionName() == "max"
|| function->getFunctionName() == "any"
|| function->getFunctionName() == "anyLast"))
return false;
std::vector<NodeWithInfo> candidates;
auto & function_arguments = function->getArguments().getNodes();
if (function_arguments.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected a single argument of function '{}' but received {}", function->getFunctionName(), function_arguments.size());
if (!function->getResultType()->equals(*function_arguments[0]->getResultType()))
return false;
candidates.push_back({ function_arguments[0], true });
/// Using DFS we traverse function tree and try to find if it uses other keys as function arguments.
while (!candidates.empty())
{
auto [candidate, parents_are_only_deterministic] = candidates.back();
candidates.pop_back();
bool found = group_by_keys.contains(candidate);
switch (candidate->getNodeType())
{
case QueryTreeNodeType::FUNCTION:
{
auto * func = candidate->as<FunctionNode>();
auto & arguments = func->getArguments().getNodes();
if (arguments.empty())
return false;
if (!found)
{
bool is_deterministic_function = parents_are_only_deterministic &&
func->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic_function });
}
break;
}
case QueryTreeNodeType::COLUMN:
if (!found)
return false;
break;
case QueryTreeNodeType::CONSTANT:
if (!parents_are_only_deterministic)
return false;
break;
default:
return false;
}
}
return true;
}
GroupByKeysStack group_by_keys_stack;
};
}
void AggregateFunctionOfGroupByKeysPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
EliminateFunctionVisitor eliminator(context);
eliminator.visit(query_tree_node);
}
};

View File

@ -0,0 +1,28 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section.
*
* Example: SELECT max(column) FROM table GROUP BY column;
* Result: SELECT column FROM table GROUP BY column;
*/
class AggregateFunctionOfGroupByKeysPass final : public IQueryTreePass
{
public:
String getName() override { return "AggregateFunctionOfGroupByKeys"; }
String getDescription() override
{
return "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section.";
}
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -92,7 +92,7 @@ private:
if (!found)
{
bool is_deterministic_function = parents_are_only_deterministic &&
function->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
func->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic_function });
}

View File

@ -5,6 +5,7 @@
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -557,6 +558,9 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
}
else if (const auto * ast_literal = expression->as<ASTLiteral>())
{
if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type)
result = std::make_shared<ConstantNode>(ast_literal->value, applyVisitor(FieldToDataType<LeastSupertypeOnError::Variant>(), ast_literal->value));
else
result = std::make_shared<ConstantNode>(ast_literal->value);
}
else if (const auto * function = expression->as<ASTFunction>())

View File

@ -46,6 +46,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
@ -164,7 +165,6 @@ private:
/** ClickHouse query tree pass manager.
*
* TODO: Support setting optimize_aggregators_of_group_by_keys.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
*/
@ -264,6 +264,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());
/// should before AggregateFunctionsArithmericOperationsPass
manager.addPass(std::make_unique<AggregateFunctionOfGroupByKeysPass>());
manager.addPass(std::make_unique<AggregateFunctionsArithmericOperationsPass>());
manager.addPass(std::make_unique<UniqInjectiveFunctionsEliminationPass>());
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());

View File

@ -19,7 +19,7 @@ class BackupCoordinationFileInfos
public:
/// plain_backup sets that we're writing a plain backup, which means all duplicates are written as is, and empty files are written as is.
/// (For normal backups only the first file amongst duplicates is actually stored, and empty files are not stored).
BackupCoordinationFileInfos(bool plain_backup_) : plain_backup(plain_backup_) {}
explicit BackupCoordinationFileInfos(bool plain_backup_) : plain_backup(plain_backup_) {}
/// Adds file infos for the specified host.
void addFileInfos(BackupFileInfos && file_infos, const String & host_id);

View File

@ -21,7 +21,7 @@ namespace DB
class BackupCoordinationLocal : public IBackupCoordination
{
public:
BackupCoordinationLocal(bool plain_backup_);
explicit BackupCoordinationLocal(bool plain_backup_);
~BackupCoordinationLocal() override;
void setStage(const String & new_stage, const String & message) override;

View File

@ -927,7 +927,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
const auto write_info_to_archive = [&](const auto & file_name)
{
auto out = archive_writer->writeFile(file_name);
auto out = archive_writer->writeFile(file_name, info.size);
auto read_buffer = entry->getReadBuffer(writer->getReadSettings());
if (info.base_size != 0)
read_buffer->seek(info.base_size, SEEK_SET);

View File

@ -52,7 +52,7 @@ private:
struct Task : public AsyncTask
{
Task(PacketReceiver & receiver_) : receiver(receiver_) {}
explicit Task(PacketReceiver & receiver_) : receiver(receiver_) {}
PacketReceiver & receiver;

View File

@ -53,7 +53,7 @@ class TestHint
{
public:
using ErrorVector = std::vector<int>;
TestHint(const String & query_);
explicit TestHint(const String & query_);
const auto & serverErrors() const { return server_errors; }
const auto & clientErrors() const { return client_errors; }

View File

@ -1,10 +1,7 @@
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/MaskOperations.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/AlignedBuffer.h>
#include <Common/Arena.h>
#include <Common/FieldVisitorToString.h>
@ -14,6 +11,10 @@
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
namespace DB
@ -518,6 +519,23 @@ void ColumnAggregateFunction::insert(const Field & x)
func->deserialize(data.back(), read_buffer, version, &arena);
}
bool ColumnAggregateFunction::tryInsert(const DB::Field & x)
{
if (x.getType() != Field::Types::AggregateFunctionState)
return false;
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
if (type_string != field_name)
return false;
ensureOwnership();
Arena & arena = createOrGetArena();
pushBackAndCreateState(data, arena, func.get());
ReadBufferFromString read_buffer(x.get<const AggregateFunctionStateData &>().data);
func->deserialize(data.back(), read_buffer, version, &arena);
return true;
}
void ColumnAggregateFunction::insertDefault()
{
ensureOwnership();
@ -525,7 +543,7 @@ void ColumnAggregateFunction::insertDefault()
pushBackAndCreateState(data, arena, func.get());
}
StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin, const UInt8 *) const
StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
{
WriteBufferFromArena out(arena, begin);
func->serialize(data[n], out, version);
@ -633,11 +651,6 @@ void ColumnAggregateFunction::getPermutation(PermutationSortDirection /*directio
void ColumnAggregateFunction::updatePermutation(PermutationSortDirection, PermutationSortStability,
size_t, int, Permutation &, EqualRanges&) const {}
void ColumnAggregateFunction::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const
{
/// Place serialized default values into min/max.
@ -673,7 +686,7 @@ ColumnAggregateFunction::MutablePtr ColumnAggregateFunction::createView() const
}
ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction & src_)
: COWHelper<IColumn, ColumnAggregateFunction>(src_),
: COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>(src_),
foreign_arenas(concatArenas(src_.foreign_arenas, src_.my_arena)),
func(src_.func), src(src_.getPtr()), data(src_.data.begin(), src_.data.end())
{

View File

@ -51,13 +51,13 @@ using ConstArenas = std::vector<ConstArenaPtr>;
* specifying which individual values should be destroyed and which ones should not.
* Clearly, this method would have a substantially non-zero price.
*/
class ColumnAggregateFunction final : public COWHelper<IColumn, ColumnAggregateFunction>
class ColumnAggregateFunction final : public COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>
{
public:
using Container = PaddedPODArray<AggregateDataPtr>;
private:
friend class COWHelper<IColumn, ColumnAggregateFunction>;
friend class COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>;
/// Arenas used by function states that are created elsewhere. We own these
/// arenas in the sense of extending their lifetime, but do not modify them.
@ -160,9 +160,11 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertDefault() override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * src_arena) override;
@ -201,8 +203,6 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t, size_t, const IColumn &, int) const override
{
return 0;

View File

@ -8,7 +8,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/Exception.h>
#include <Common/Arena.h>
#include <Common/SipHash.h>
@ -205,7 +204,7 @@ void ColumnArray::insertData(const char * pos, size_t length)
}
StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
@ -226,6 +225,19 @@ StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char con
}
char * ColumnArray::serializeValueIntoMemory(size_t n, char * memory) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
memcpy(memory, &array_size, sizeof(array_size));
memory += sizeof(array_size);
for (size_t i = 0; i < array_size; ++i)
memory = getData().serializeValueIntoMemory(offset + i, memory);
return memory;
}
const char * ColumnArray::deserializeAndInsertFromArena(const char * pos)
{
size_t array_size = unalignedLoad<size_t>(pos);
@ -305,6 +317,25 @@ void ColumnArray::insert(const Field & x)
getOffsets().push_back(getOffsets().back() + size);
}
bool ColumnArray::tryInsert(const Field & x)
{
if (x.getType() != Field::Types::Which::Array)
return false;
const Array & array = x.get<const Array &>();
size_t size = array.size();
for (size_t i = 0; i < size; ++i)
{
if (!getData().tryInsert(array[i]))
{
getData().popBack(i);
return false;
}
}
getOffsets().push_back(getOffsets().back() + size);
return true;
}
void ColumnArray::insertFrom(const IColumn & src_, size_t n)
{
@ -371,19 +402,6 @@ int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnArray>(assert_cast<const ColumnArray &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnArray::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnArray>();
}
struct ColumnArray::ComparatorBase
{
const ColumnArray & parent;
@ -969,22 +987,6 @@ ColumnPtr ColumnArray::compress() const
});
}
double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
}
UInt64 ColumnArray::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnArray>();
}
void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{
if (replicate_offsets.empty())
@ -1279,11 +1281,6 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
assert_cast<const ColumnArray &>(*temporary_arrays.front()).getOffsetsPtr());
}
void ColumnArray::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
size_t ColumnArray::getNumberOfDimensions() const
{
const auto * nested_array = checkAndGetColumn<ColumnArray>(*data);

View File

@ -15,10 +15,10 @@ namespace DB
* In memory, it is represented as one column of a nested type, whose size is equal to the sum of the sizes of all arrays,
* and as an array of offsets in it, which allows you to get each element.
*/
class ColumnArray final : public COWHelper<IColumn, ColumnArray>
class ColumnArray final : public COWHelper<IColumnHelper<ColumnArray>, ColumnArray>
{
private:
friend class COWHelper<IColumn, ColumnArray>;
friend class COWHelper<IColumnHelper<ColumnArray>, ColumnArray>;
/** Create an array column with specified values and offsets. */
ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column);
@ -48,7 +48,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnArray>;
using Base = COWHelper<IColumnHelper<ColumnArray>, ColumnArray>;
static Ptr create(const ColumnPtr & nested_column, const ColumnPtr & offsets_column)
{
@ -77,7 +77,8 @@ public:
StringRef getDataAt(size_t n) const override;
bool isDefaultAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -85,6 +86,7 @@ public:
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
@ -94,11 +96,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
bool hasEqualValues() const override;
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
@ -147,13 +145,6 @@ public:
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnArray>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override
@ -177,11 +168,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { data->finalize(); }
bool isFinalized() const override { return data->isFinalized(); }

View File

@ -30,7 +30,7 @@ namespace ErrorCodes
*
* Also in-memory compression allows to keep more data in RAM.
*/
class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
class ColumnCompressed : public COWHelper<IColumnHelper<ColumnCompressed>, ColumnCompressed>
{
public:
using Lazy = std::function<ColumnPtr()>;
@ -84,11 +84,13 @@ public:
StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); }
bool tryInsert(const Field &) override { throwMustBeDecompressed(); }
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeDecompressed(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeDecompressed(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }

View File

@ -20,10 +20,10 @@ namespace ErrorCodes
/** ColumnConst contains another column with single element,
* but looks like a column with arbitrary amount of same elements.
*/
class ColumnConst final : public COWHelper<IColumn, ColumnConst>
class ColumnConst final : public COWHelper<IColumnHelper<ColumnConst>, ColumnConst>
{
private:
friend class COWHelper<IColumn, ColumnConst>;
friend class COWHelper<IColumnHelper<ColumnConst>, ColumnConst>;
WrappedPtr data;
size_t s;
@ -131,6 +131,15 @@ public:
++s;
}
bool tryInsert(const Field & field) override
{
auto tmp = data->cloneEmpty();
if (!tmp->tryInsert(field))
return false;
++s;
return true;
}
void insertData(const char *, size_t) override
{
++s;
@ -151,11 +160,16 @@ public:
s -= n;
}
StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin, const UInt8 *) const override
StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin) const override
{
return data->serializeValueIntoArena(0, arena, begin);
}
char * serializeValueIntoMemory(size_t, char * memory) const override
{
return data->serializeValueIntoMemory(0, memory);
}
const char * deserializeAndInsertFromArena(const char * pos) override
{
const auto * res = data->deserializeAndInsertFromArena(pos);

View File

@ -42,46 +42,6 @@ int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c
return decimalLess<T>(b, a, other.scale, scale) ? 1 : (decimalLess<T>(a, b, scale, other.scale) ? -1 : 0);
}
template <is_decimal T>
void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return this->template doCompareColumn<ColumnDecimal<T>>(static_cast<const Self &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
template <is_decimal T>
bool ColumnDecimal<T>::hasEqualValues() const
{
return this->template hasEqualValuesImpl<ColumnDecimal<T>>();
}
template <is_decimal T>
StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
memcpy(pos, &data[n], sizeof(T));
return res;
}
template <is_decimal T>
const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
{
@ -334,6 +294,16 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
return res;
}
template <is_decimal T>
bool ColumnDecimal<T>::tryInsert(const Field & x)
{
DecimalField<T> value;
if (!x.tryGet<DecimalField<T>>(value))
return false;
data.push_back(value);
return true;
}
template <is_decimal T>
void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
{
@ -460,12 +430,6 @@ ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets & offsets) const
return res;
}
template <is_decimal T>
void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::compress() const
{

View File

@ -1,14 +1,12 @@
#pragma once
#include <cmath>
#include <base/sort.h>
#include <base/TypeName.h>
#include <Core/Field.h>
#include <Core/DecimalFunctions.h>
#include <Core/TypeId.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
@ -18,11 +16,11 @@ namespace DB
/// A ColumnVector for Decimals
template <is_decimal T>
class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
class ColumnDecimal final : public COWHelper<IColumnHelper<ColumnDecimal<T>, ColumnFixedSizeHelper>, ColumnDecimal<T>>
{
private:
using Self = ColumnDecimal;
friend class COWHelper<ColumnVectorHelper, Self>;
friend class COWHelper<IColumnHelper<Self, ColumnFixedSizeHelper>, Self>;
public:
using ValueType = T;
@ -62,6 +60,7 @@ public:
void insertDefault() override { data.push_back(T()); }
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); }
bool tryInsert(const Field & x) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void popBack(size_t n) override
@ -81,17 +80,12 @@ public:
Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo<Float64>(data[n], scale); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
@ -118,13 +112,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override
{
return this->template scatterImpl<Self>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnDecimal<T> *>(&rhs))
@ -132,21 +119,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override;
void insertValue(const T value) { data.push_back(value); }

View File

@ -7,7 +7,7 @@
namespace DB
{
/** Allows to access internal array of ColumnVector or ColumnFixedString without cast to concrete type.
/** Allows to access internal array of fixed-size column without cast to concrete type.
* We will inherit ColumnVector and ColumnFixedString from this class instead of IColumn.
* Assumes data layout of ColumnVector, ColumnFixedString and PODArray.
*
@ -22,7 +22,7 @@ namespace DB
* To allow functional tests to work under UBSan we have to separate some base class that will present the memory layout in explicit way,
* and we will do static_cast to this class.
*/
class ColumnVectorHelper : public IColumn
class ColumnFixedSizeHelper : public IColumn
{
public:
template <size_t ELEMENT_SIZE>

View File

@ -2,7 +2,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteHelpers.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
@ -63,6 +62,17 @@ void ColumnFixedString::insert(const Field & x)
insertData(s.data(), s.size());
}
bool ColumnFixedString::tryInsert(const Field & x)
{
if (x.getType() != Field::Types::Which::String)
return false;
const String & s = x.get<const String &>();
if (s.size() > n)
return false;
insertData(s.data(), s.size());
return true;
}
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
{
const ColumnFixedString & src = assert_cast<const ColumnFixedString &>(src_);
@ -86,30 +96,6 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
memset(chars.data() + old_size + length, 0, n - length);
}
StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + n;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = n;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
memcpy(pos, &chars[n * index], n);
return res;
}
const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
{
size_t old_size = chars.size();
@ -364,11 +350,6 @@ ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
return res;
}
void ColumnFixedString::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnFixedString::getExtremes(Field & min, Field & max) const
{
min = String();

View File

@ -6,7 +6,7 @@
#include <Common/assert_cast.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Core/Field.h>
@ -16,10 +16,10 @@ namespace DB
/** A column of values of "fixed-length string" type.
* If you insert a smaller string, it will be padded with zero bytes.
*/
class ColumnFixedString final : public COWHelper<ColumnVectorHelper, ColumnFixedString>
class ColumnFixedString final : public COWHelper<IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>, ColumnFixedString>
{
public:
friend class COWHelper<ColumnVectorHelper, ColumnFixedString>;
friend class COWHelper<IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>, ColumnFixedString>;
using Chars = PaddedPODArray<UInt8>;
@ -96,6 +96,8 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src_, size_t index) override;
void insertData(const char * pos, size_t length) override;
@ -105,7 +107,7 @@ public:
chars.resize_fill(chars.size() + n);
}
virtual void insertManyDefaults(size_t length) override
void insertManyDefaults(size_t length) override
{
chars.resize_fill(chars.size() + n * length);
}
@ -115,8 +117,6 @@ public:
chars.resize_assume_reserved(chars.size() - n * elems);
}
StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
@ -134,24 +134,6 @@ public:
return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n);
}
void compareColumn(
const IColumn & rhs_,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const override
{
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
chassert(this->n == rhs.n);
return doCompareColumn<ColumnFixedString>(rhs, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint);
}
bool hasEqualValues() const override
{
return hasEqualValuesImpl<ColumnFixedString>();
}
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;
@ -173,13 +155,6 @@ public:
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnFixedString>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t size) override
@ -206,21 +181,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnFixedString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
}
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }

View File

@ -19,10 +19,10 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
/** A column containing a lambda expression.
* Contains an expression and captured columns, but not input arguments.
*/
class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
class ColumnFunction final : public COWHelper<IColumnHelper<ColumnFunction>, ColumnFunction>
{
private:
friend class COWHelper<IColumn, ColumnFunction>;
friend class COWHelper<IColumnHelper<ColumnFunction>, ColumnFunction>;
ColumnFunction(
size_t size,
@ -84,6 +84,11 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
bool tryInsert(const Field &) override
{
return false;
}
void insertDefault() override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
@ -97,7 +102,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName());
}

View File

@ -2,7 +2,6 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h>
@ -137,7 +136,19 @@ ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, M
void ColumnLowCardinality::insert(const Field & x)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
idx.insertPosition(getDictionary().uniqueInsert(x));
}
bool ColumnLowCardinality::tryInsert(const Field & x)
{
compactIfSharedDictionary();
size_t index;
if (!dictionary.getColumnUnique().tryUniqueInsert(x, index))
return false;
idx.insertPosition(index);
return true;
}
void ColumnLowCardinality::insertDefault()
@ -163,14 +174,14 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
idx.insertPosition(getDictionary().uniqueInsertFrom(nested, position));
}
}
void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
}
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -197,7 +208,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
auto src_nested = low_cardinality_src->getDictionary().getNestedColumn();
auto used_keys = src_nested->index(*idx_map, 0);
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length);
}
}
@ -205,7 +216,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
{
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(src, start, length);
idx.insertPositionsRange(*inserted_indexes, 0, length);
}
@ -245,27 +256,50 @@ void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn
{
checkPositionsAreLimited(positions, keys.size());
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size());
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(keys, 0, keys.size());
idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size());
}
void ColumnLowCardinality::insertData(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
idx.insertPosition(getDictionary().uniqueInsertData(pos, length));
}
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
}
char * ColumnLowCardinality::serializeValueIntoMemory(size_t n, char * memory) const
{
return getDictionary().serializeValueIntoMemory(getIndexes().getUInt(n), memory);
}
void ColumnLowCardinality::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
/// nullable is handled internally.
chassert(is_null == nullptr);
if (empty())
return;
size_t rows = size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
PaddedPODArray<UInt64> dict_sizes;
getDictionary().collectSerializedValueSizes(dict_sizes, nullptr);
idx.collectSerializedValueSizes(sizes, dict_sizes);
}
const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * pos)
{
compactIfSharedDictionary();
const char * new_pos;
idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos));
idx.insertPosition(getDictionary().uniqueDeserializeAndInsertFromArena(pos, new_pos));
return new_pos;
}
@ -296,11 +330,6 @@ void ColumnLowCardinality::updateHashFast(SipHash & hash) const
getDictionary().getNestedColumn()->updateHashFast(hash);
}
void ColumnLowCardinality::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
{
auto unique_ptr = dictionary.getColumnUniquePtr();
@ -342,15 +371,6 @@ int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColu
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnLowCardinality>(
assert_cast<const ColumnLowCardinality &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnLowCardinality::hasEqualValues() const
{
if (getDictionary().size() <= 1)
@ -490,7 +510,7 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
{
auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
auto new_column_unique = Dictionary::compact(dictionary.getColumnUnique(), sub_positions);
auto new_column_unique = Dictionary::compact(getDictionary(), sub_positions);
return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
}
@ -800,6 +820,20 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
callForType(std::move(update_weak_hash), size_of_type);
}
void ColumnLowCardinality::Index::collectSerializedValueSizes(
PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const
{
auto func = [&](auto x)
{
using CurIndexType = decltype(x);
auto & data = getPositionsData<CurIndexType>();
size_t rows = sizes.size();
for (size_t i = 0; i < rows; ++i)
sizes[i] += dict_sizes[data[i]];
};
callForType(std::move(func), size_of_type);
}
ColumnLowCardinality::Dictionary::Dictionary(MutableColumnPtr && column_unique_, bool is_shared)
: column_unique(std::move(column_unique_)), shared(is_shared)

View File

@ -1,9 +1,10 @@
#pragma once
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnUnique.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include "ColumnsNumber.h"
#include <Common/typeid_cast.h>
namespace DB
@ -23,9 +24,9 @@ namespace ErrorCodes
*
* @note The indices column always contains the default value (empty StringRef) with the first index.
*/
class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinality>
class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>
{
friend class COWHelper<IColumn, ColumnLowCardinality>;
friend class COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
ColumnLowCardinality(const ColumnLowCardinality & other) = default;
@ -34,7 +35,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnLowCardinality>;
using Base = COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
{
return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
@ -74,6 +75,7 @@ public:
}
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
@ -87,7 +89,10 @@ public:
void popBack(size_t n) override { idx.popBack(n); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
@ -124,10 +129,6 @@ public:
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
bool hasEqualValues() const override;
@ -151,8 +152,6 @@ public:
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
void getExtremes(Field & min, Field & max) const override
{
return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize
@ -314,6 +313,8 @@ public:
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
private:
WrappedPtr positions;
size_t size_of_type = 0;

View File

@ -1,7 +1,5 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/typeid_cast.h>
@ -102,6 +100,15 @@ void ColumnMap::insert(const Field & x)
nested->insert(Array(map.begin(), map.end()));
}
bool ColumnMap::tryInsert(const Field & x)
{
if (x.getType() != Field::Types::Which::Map)
return false;
const auto & map = x.get<const Map &>();
return nested->tryInsert(Array(map.begin(), map.end()));
}
void ColumnMap::insertDefault()
{
nested->insertDefault();
@ -111,11 +118,16 @@ void ColumnMap::popBack(size_t n)
nested->popBack(n);
}
StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return nested->serializeValueIntoArena(n, arena, begin);
}
char * ColumnMap::serializeValueIntoMemory(size_t n, char * memory) const
{
return nested->serializeValueIntoMemory(n, memory);
}
const char * ColumnMap::deserializeAndInsertFromArena(const char * pos)
{
return nested->deserializeAndInsertFromArena(pos);
@ -199,19 +211,6 @@ int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direct
return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint);
}
void ColumnMap::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnMap>(assert_cast<const ColumnMap &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnMap::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnMap>();
}
void ColumnMap::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
{
@ -224,11 +223,6 @@ void ColumnMap::updatePermutation(IColumn::PermutationSortDirection direction, I
nested->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnMap::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnMap::reserve(size_t n)
{
nested->reserve(n);
@ -301,21 +295,6 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
return false;
}
double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
}
UInt64 ColumnMap::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnMap>();
}
void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
}
ColumnPtr ColumnMap::compress() const
{
auto compressed = nested->compress();

View File

@ -10,10 +10,10 @@ namespace DB
/** Column, that stores a nested Array(Tuple(key, value)) column.
*/
class ColumnMap final : public COWHelper<IColumn, ColumnMap>
class ColumnMap final : public COWHelper<IColumnHelper<ColumnMap>, ColumnMap>
{
private:
friend class COWHelper<IColumn, ColumnMap>;
friend class COWHelper<IColumnHelper<ColumnMap>, ColumnMap>;
WrappedPtr nested;
@ -25,7 +25,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnMap>;
using Base = COWHelper<IColumnHelper<ColumnMap>, ColumnMap>;
static Ptr create(const ColumnPtr & keys, const ColumnPtr & values, const ColumnPtr & offsets)
{
@ -56,9 +56,11 @@ public:
StringRef getDataAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -72,12 +74,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -93,9 +90,6 @@ public:
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { nested->finalize(); }
bool isFinalized() const override { return nested->isFinalized(); }

View File

@ -2,16 +2,11 @@
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsDateTime.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnLowCardinality.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#if USE_EMBEDDED_COMPILER
#include <DataTypes/Native.h>
@ -35,7 +30,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
nested_type = nested_column->getDataType();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@ -136,77 +130,35 @@ void ColumnNullable::insertData(const char * pos, size_t length)
}
}
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
char * pos;
switch (nested_type)
{
case TypeIndex::UInt8:
return static_cast<const ColumnUInt8 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt16:
return static_cast<const ColumnUInt16 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt32:
return static_cast<const ColumnUInt32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt64:
return static_cast<const ColumnUInt64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt128:
return static_cast<const ColumnUInt128 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt256:
return static_cast<const ColumnUInt256 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int8:
return static_cast<const ColumnInt8 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int16:
return static_cast<const ColumnInt16 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int32:
return static_cast<const ColumnInt32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int64:
return static_cast<const ColumnInt64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int128:
return static_cast<const ColumnInt128 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int256:
return static_cast<const ColumnInt256 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Float32:
return static_cast<const ColumnFloat32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Float64:
return static_cast<const ColumnFloat64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Date:
return static_cast<const ColumnDate *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Date32:
return static_cast<const ColumnDate32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::DateTime:
return static_cast<const ColumnDateTime *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::DateTime64:
return static_cast<const ColumnDateTime64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::String:
return static_cast<const ColumnString *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::FixedString:
return static_cast<const ColumnFixedString *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal32:
return static_cast<const ColumnDecimal<Decimal32> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal64:
return static_cast<const ColumnDecimal<Decimal64> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal128:
return static_cast<const ColumnDecimal<Decimal128> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal256:
return static_cast<const ColumnDecimal<Decimal256> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UUID:
return static_cast<const ColumnUUID *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::IPv4:
return static_cast<const ColumnIPv4 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::IPv6:
return static_cast<const ColumnIPv6 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
default:
pos = arena.allocContinue(s, begin);
auto * pos = arena.allocContinue(s, begin);
memcpy(pos, &arr[n], s);
if (arr[n])
return StringRef(pos, s);
auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
/// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
char * ColumnNullable::serializeValueIntoMemory(size_t n, char * memory) const
{
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
memcpy(memory, &arr[n], s);
++memory;
if (arr[n])
return memory;
return getNestedColumn().serializeValueIntoMemory(n, memory);
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
@ -256,6 +208,22 @@ void ColumnNullable::insert(const Field & x)
}
}
bool ColumnNullable::tryInsert(const Field & x)
{
if (x.isNull())
{
getNestedColumn().insertDefault();
getNullMapData().push_back(1);
return true;
}
if (!getNestedColumn().tryInsert(x))
return false;
getNullMapData().push_back(0);
return true;
}
void ColumnNullable::insertFrom(const IColumn & src, size_t n)
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
@ -402,19 +370,6 @@ int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & r
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnNullable>(assert_cast<const ColumnNullable &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnNullable::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnNullable>();
}
void ColumnNullable::getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
{
@ -664,11 +619,6 @@ void ColumnNullable::updatePermutationWithCollation(const Collator & collator, I
updatePermutationImpl(direction, stability, limit, null_direction_hint, res, equal_ranges, &collator);
}
void ColumnNullable::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnNullable::reserve(size_t n)
{
getNestedColumn().reserve(n);

View File

@ -1,12 +1,10 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include "Core/TypeId.h"
#include "config.h"
@ -27,10 +25,10 @@ using ConstNullMapPtr = const NullMap *;
/// over a bitmap because columns are usually stored on disk as compressed
/// files. In this regard, using a bitmap instead of a byte map would
/// greatly complicate the implementation with little to no benefits.
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>
class ColumnNullable final : public COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>
{
private:
friend class COWHelper<IColumn, ColumnNullable>;
friend class COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>;
ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_);
ColumnNullable(const ColumnNullable &) = default;
@ -39,7 +37,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnNullable>;
using Base = COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>;
static Ptr create(const ColumnPtr & nested_column_, const ColumnPtr & null_map_)
{
return ColumnNullable::create(nested_column_->assumeMutable(), null_map_->assumeMutable());
@ -63,11 +61,13 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src, size_t n) override;
void insertFromNotNullable(const IColumn & src, size_t n);
@ -95,11 +95,7 @@ public:
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
bool hasEqualValues() const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
@ -123,13 +119,6 @@ public:
// Special function for nullable minmax index
void getExtremesNullLast(Field & min, Field & max) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnNullable>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override
@ -153,21 +142,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnNullable>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
}
ColumnPtr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
bool isNullable() const override { return true; }
@ -214,8 +188,6 @@ public:
private:
WrappedPtr nested_column;
WrappedPtr null_map;
// optimize serializeValueIntoArena
TypeIndex nested_type;
template <bool negative>
void applyNullMapImpl(const NullMap & map);

View File

@ -12,7 +12,6 @@
#include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h>
#include <Common/HashTable/HashSet.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <numeric>
@ -716,6 +715,15 @@ void ColumnObject::insert(const Field & field)
++num_rows;
}
bool ColumnObject::tryInsert(const Field & field)
{
if (field.getType() != Field::Types::Which::Object)
return false;
insert(field);
return true;
}
void ColumnObject::insertDefault()
{
for (auto & entry : subcolumns)
@ -843,14 +851,6 @@ void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStabi
iota(res.data(), res.size(), size_t(0));
}
void ColumnObject::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnObject>(assert_cast<const ColumnObject &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
void ColumnObject::getExtremes(Field & min, Field & max) const
{
if (num_rows == 0)
@ -865,16 +865,6 @@ void ColumnObject::getExtremes(Field & min, Field & max) const
}
}
MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & selector) const
{
return scatterImpl<ColumnObject>(num_columns, selector);
}
void ColumnObject::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
{
if (const auto * node = subcolumns.findLeaf(key))

View File

@ -48,7 +48,7 @@ FieldInfo getFieldInfo(const Field & field);
* a trie-like structure. ColumnObject is not suitable for writing into tables
* and it should be converted to Tuple with fixed set of subcolumns before that.
*/
class ColumnObject final : public COWHelper<IColumn, ColumnObject>
class ColumnObject final : public COWHelper<IColumnHelper<ColumnObject>, ColumnObject>
{
public:
/** Class that represents one subcolumn.
@ -209,6 +209,7 @@ public:
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
void insert(const Field & field) override;
bool tryInsert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -228,23 +229,17 @@ public:
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer) override;
/// All other methods throw exception.
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }

View File

@ -1,13 +1,14 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Columns/ColumnTuple.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
#include <Common/WeakHash.h>
#include <Common/iota.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <algorithm>
#include <bit>
@ -152,11 +153,16 @@ void ColumnSparse::insertData(const char * pos, size_t length)
insertSingleValue([&](IColumn & column) { column.insertData(pos, length); });
}
StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return values->serializeValueIntoArena(getValueIndex(n), arena, begin);
}
char * ColumnSparse::serializeValueIntoMemory(size_t n, char * memory) const
{
return values->serializeValueIntoMemory(getValueIndex(n), memory);
}
const char * ColumnSparse::deserializeAndInsertFromArena(const char * pos)
{
const char * res = nullptr;
@ -234,6 +240,15 @@ void ColumnSparse::insert(const Field & x)
insertSingleValue([&](IColumn & column) { column.insert(x); });
}
bool ColumnSparse::tryInsert(const Field & x)
{
if (!values->tryInsert(x))
return false;
insertSingleValue([&](IColumn &) {}); /// Value already inserted, use no-op inserter.
return true;
}
void ColumnSparse::insertFrom(const IColumn & src, size_t n)
{
if (const auto * src_sparse = typeid_cast<const ColumnSparse *>(&src))
@ -721,16 +736,6 @@ UInt64 ColumnSparse::getNumberOfDefaultRows() const
return _size - offsets->size();
}
MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const
{
return scatterImpl<ColumnSparse>(num_columns, selector);
}
void ColumnSparse::gather(ColumnGathererStream & gatherer_stream)
{
gatherer_stream.gather(*this);
}
ColumnPtr ColumnSparse::compress() const
{
auto values_compressed = values->compress();

View File

@ -1,7 +1,6 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
@ -18,10 +17,10 @@ namespace DB
* values contains also one default value at 0 position to make
* implementation of execution of functions and sorting more convenient.
*/
class ColumnSparse final : public COWHelper<IColumn, ColumnSparse>
class ColumnSparse final : public COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>
{
private:
friend class COWHelper<IColumn, ColumnSparse>;
friend class COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>;
explicit ColumnSparse(MutableColumnPtr && values_);
ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_);
@ -31,7 +30,7 @@ public:
static constexpr auto DEFAULT_ROWS_SEARCH_SAMPLE_RATIO = 0.1;
static constexpr auto DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION = 0.95;
using Base = COWHelper<IColumn, ColumnSparse>;
using Base = COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>;
static Ptr create(const ColumnPtr & values_, const ColumnPtr & offsets_, size_t size_)
{
return Base::create(values_->assumeMutable(), offsets_->assumeMutable(), size_);
@ -78,11 +77,13 @@ public:
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src, size_t n) override;
void insertDefault() override;
void insertManyDefaults(size_t length) override;
@ -134,10 +135,6 @@ public:
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override;

View File

@ -4,7 +4,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/WeakHash.h>
@ -27,7 +26,7 @@ namespace ErrorCodes
ColumnString::ColumnString(const ColumnString & src)
: COWHelper<IColumn, ColumnString>(src),
: COWHelper<IColumnHelper<ColumnString>, ColumnString>(src),
offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end())
{
@ -213,34 +212,69 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
}
StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
void ColumnString::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
if (empty())
return;
size_t rows = size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
if (is_null)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(string_size) + string_size;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
for (size_t i = 0; i < rows; ++i)
{
if (is_null[i])
{
++sizes[i];
}
else
{
res.size = sizeof(string_size) + string_size;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
size_t string_size = sizeAt(i);
sizes[i] += sizeof(string_size) + string_size + 1 /* null byte */;
}
}
}
else
{
for (size_t i = 0; i < rows; ++i)
{
size_t string_size = sizeAt(i);
sizes[i] += sizeof(string_size) + string_size;
}
}
}
StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
StringRef res;
res.size = sizeof(string_size) + string_size;
char * pos = arena.allocContinue(res.size, begin);
memcpy(pos, &string_size, sizeof(string_size));
memcpy(pos + sizeof(string_size), &chars[offset], string_size);
res.data = pos;
return res;
}
char * ColumnString::serializeValueIntoMemory(size_t n, char * memory) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
memcpy(memory, &string_size, sizeof(string_size));
memory += sizeof(string_size);
memcpy(memory, &chars[offset], string_size);
return memory + string_size;
}
const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
{
const size_t string_size = unalignedLoad<size_t>(pos);
@ -303,20 +337,6 @@ ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return res;
}
void ColumnString::compareColumn(
const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnString>(assert_cast<const ColumnString &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnString::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnString>();
}
struct ColumnString::ComparatorBase
{
const ColumnString & parent;
@ -482,13 +502,6 @@ ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
return res;
}
void ColumnString::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnString::reserve(size_t n)
{
offsets.reserve_exact(n);

View File

@ -23,14 +23,14 @@ class Arena;
/** Column for String values.
*/
class ColumnString final : public COWHelper<IColumn, ColumnString>
class ColumnString final : public COWHelper<IColumnHelper<ColumnString>, ColumnString>
{
public:
using Char = UInt8;
using Chars = PaddedPODArray<UInt8>;
private:
friend class COWHelper<IColumn, ColumnString>;
friend class COWHelper<IColumnHelper<ColumnString>, ColumnString>;
/// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
Offsets offsets;
@ -128,6 +128,15 @@ public:
offsets.push_back(new_size);
}
bool tryInsert(const Field & x) override
{
if (x.getType() != Field::Types::Which::String)
return false;
insert(x);
return true;
}
void insertFrom(const IColumn & src_, size_t n) override
{
const ColumnString & src = assert_cast<const ColumnString &>(src_);
@ -170,7 +179,10 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
@ -225,12 +237,6 @@ public:
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);
}
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
/// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
@ -249,13 +255,6 @@ public:
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnString>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t n) override;
@ -263,7 +262,6 @@ public:
void getExtremes(Field & min, Field & max) const override;
bool canBeInsideNullable() const override { return true; }
bool structureEquals(const IColumn & rhs) const override
@ -271,21 +269,6 @@ public:
return typeid(rhs) == typeid(ColumnString);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
}
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

View File

@ -3,15 +3,15 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h>
#include <Core/Field.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
#include <DataTypes/Serializations/SerializationInfoTuple.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <base/sort.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
namespace DB
@ -148,6 +148,31 @@ void ColumnTuple::insert(const Field & x)
columns[i]->insert(tuple[i]);
}
bool ColumnTuple::tryInsert(const Field & x)
{
if (x.getType() != Field::Types::Which::Tuple)
return false;
const auto & tuple = x.get<const Tuple &>();
const size_t tuple_size = columns.size();
if (tuple.size() != tuple_size)
return false;
for (size_t i = 0; i < tuple_size; ++i)
{
if (!columns[i]->tryInsert(tuple[i]))
{
for (size_t j = 0; j != i; ++j)
columns[i]->popBack(1);
return false;
}
}
return true;
}
void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
{
const ColumnTuple & src = assert_cast<const ColumnTuple &>(src_);
@ -172,7 +197,7 @@ void ColumnTuple::popBack(size_t n)
column->popBack(n);
}
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
StringRef res(begin, 0);
for (const auto & column : columns)
@ -185,6 +210,14 @@ StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char con
return res;
}
char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
{
for (const auto & column : columns)
memory = column->serializeValueIntoMemory(n, memory);
return memory;
}
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
{
for (auto & column : columns)
@ -326,24 +359,11 @@ int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_dire
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnTuple>(assert_cast<const ColumnTuple &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
bool ColumnTuple::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnTuple>();
}
template <bool positive>
struct ColumnTuple::Less
{
@ -432,11 +452,6 @@ void ColumnTuple::updatePermutationWithCollation(const Collator & collator, ICol
updatePermutationImpl(direction, stability, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnTuple::reserve(size_t n)
{
const size_t tuple_size = columns.size();
@ -567,21 +582,6 @@ ColumnPtr ColumnTuple::compress() const
});
}
double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
}
UInt64 ColumnTuple::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnTuple>();
}
void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
void ColumnTuple::finalize()
{
for (auto & column : columns)

View File

@ -12,10 +12,10 @@ namespace DB
* Mixed constant/non-constant columns is prohibited in tuple
* for implementation simplicity.
*/
class ColumnTuple final : public COWHelper<IColumn, ColumnTuple>
class ColumnTuple final : public COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>
{
private:
friend class COWHelper<IColumn, ColumnTuple>;
friend class COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>;
using TupleColumns = std::vector<WrappedPtr>;
TupleColumns columns;
@ -30,7 +30,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnTuple>;
using Base = COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>;
static Ptr create(const Columns & columns);
static Ptr create(const TupleColumns & columns);
static Ptr create(Columns && arg) { return create(arg); }
@ -58,10 +58,12 @@ public:
StringRef getDataAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -74,13 +76,8 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -102,9 +99,6 @@ public:
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override;
bool isFinalized() const override;

View File

@ -1,7 +1,6 @@
#pragma once
#include <Columns/IColumnUnique.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ReverseIndex.h>
#include <Columns/ColumnVector.h>
@ -56,6 +55,7 @@ public:
void nestedRemoveNullable() override;
size_t uniqueInsert(const Field & x) override;
bool tryUniqueInsert(const Field & x, size_t & index) override;
size_t uniqueInsertFrom(const IColumn & src, size_t n) override;
MutableColumnPtr uniqueInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
IColumnUnique::IndexesWithOverflow uniqueInsertRangeWithOverflow(const IColumn & src, size_t start, size_t length,
@ -79,7 +79,9 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
@ -346,6 +348,26 @@ size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
return uniqueInsertData(single_value_data.data, single_value_data.size);
}
template <typename ColumnType>
bool ColumnUnique<ColumnType>::tryUniqueInsert(const Field & x, size_t & index)
{
if (x.isNull())
{
if (!is_nullable)
return false;
index = getNullValueIndex();
return true;
}
auto single_value_column = column_holder->cloneEmpty();
if (!single_value_column->tryInsert(x))
return false;
auto single_value_data = single_value_column->getDataAt(0);
index = uniqueInsertData(single_value_data.data, single_value_data.size);
return true;
}
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueInsertFrom(const IColumn & src, size_t n)
{
@ -373,7 +395,21 @@ size_t ColumnUnique<ColumnType>::uniqueInsertData(const char * pos, size_t lengt
}
template <typename ColumnType>
StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
void ColumnUnique<ColumnType>::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
/// nullable is handled internally.
chassert(is_null == nullptr);
if (IColumn::empty())
return;
if (is_nullable)
column_holder->collectSerializedValueSizes(sizes, assert_cast<const ColumnUInt8 &>(*nested_null_mask).getData().data());
else
column_holder->collectSerializedValueSizes(sizes, nullptr);
}
template <typename ColumnType>
StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if (is_nullable)
{
@ -396,6 +432,22 @@ StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & ar
return column_holder->serializeValueIntoArena(n, arena, begin);
}
template <typename ColumnType>
char * ColumnUnique<ColumnType>::serializeValueIntoMemory(size_t n, char * memory) const
{
if (is_nullable)
{
UInt8 flag = (n == getNullValueIndex() ? 1 : 0);
unalignedStore<UInt8>(memory, flag);
++memory;
if (n == getNullValueIndex())
return memory;
}
return column_holder->serializeValueIntoMemory(n, memory);
}
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
{

View File

@ -426,11 +426,30 @@ void ColumnVariant::insertData(const char *, size_t)
}
void ColumnVariant::insert(const Field & x)
{
if (!tryInsert(x))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert field {} into column {}", toString(x), getName());
}
bool ColumnVariant::tryInsert(const DB::Field & x)
{
if (x.isNull())
{
insertDefault();
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert field {} to column {}", toString(x), getName());
return true;
}
for (size_t i = 0; i != variants.size(); ++i)
{
if (variants[i]->tryInsert(x))
{
getLocalDiscriminators().push_back(i);
getOffsets().push_back(variants[i]->size() - 1);
return true;
}
}
return false;
}
void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
@ -624,7 +643,7 @@ void ColumnVariant::popBack(size_t n)
offsets->popBack(n);
}
StringRef ColumnVariant::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnVariant::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
/// During any serialization/deserialization we should always use global discriminators.
Discriminator global_discr = globalDiscriminatorAt(n);
@ -805,8 +824,14 @@ ColumnPtr ColumnVariant::permute(const Permutation & perm, size_t limit) const
{
/// If we have only NULLs, permutation will take no effect, just return resized column.
if (hasOnlyNulls())
{
if (limit)
return cloneResized(limit);
/// If no limit, we can just return current immutable column.
return this->getPtr();
}
/// Optimization when we have only one non empty variant and no NULLs.
/// In this case local_discriminators column is filled with identical values and offsets column
/// filled with sequential numbers. In this case we can just apply permutation to this
@ -1060,11 +1085,6 @@ MutableColumns ColumnVariant::scatter(ColumnIndex num_columns, const Selector &
return result;
}
void ColumnVariant::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
bool ColumnVariant::hasEqualValues() const
{
if (local_discriminators->empty() || hasOnlyNulls())
@ -1073,16 +1093,70 @@ bool ColumnVariant::hasEqualValues() const
return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues();
}
void ColumnVariant::getPermutation(IColumn::PermutationSortDirection, IColumn::PermutationSortStability, size_t, int, IColumn::Permutation & res) const
int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
size_t s = local_discriminators->size();
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
const auto & rhs_variant = assert_cast<const ColumnVariant &>(rhs);
Discriminator left_discr = globalDiscriminatorAt(n);
Discriminator right_discr = rhs_variant.globalDiscriminatorAt(m);
/// Check if we have NULLs and return result based on nan_direction_hint.
if (left_discr == NULL_DISCRIMINATOR && right_discr == NULL_DISCRIMINATOR)
return 0;
else if (left_discr == NULL_DISCRIMINATOR)
return nan_direction_hint;
else if (right_discr == NULL_DISCRIMINATOR)
return -nan_direction_hint;
/// If rows have different discriminators, row with least discriminator is considered the least.
if (left_discr != right_discr)
return left_discr < right_discr ? -1 : 1;
/// If rows have the same discriminators, compare actual values from corresponding variants.
return getVariantByGlobalDiscriminator(left_discr).compareAt(offsetAt(n), rhs_variant.offsetAt(m), rhs_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
}
void ColumnVariant::updatePermutation(IColumn::PermutationSortDirection, IColumn::PermutationSortStability, size_t, int, IColumn::Permutation &, DB::EqualRanges &) const
struct ColumnVariant::ComparatorBase
{
const ColumnVariant & parent;
int nan_direction_hint;
ComparatorBase(const ColumnVariant & parent_, int nan_direction_hint_)
: parent(parent_), nan_direction_hint(nan_direction_hint_)
{
}
ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
{
int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
return res;
}
};
void ColumnVariant::getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
getPermutationImpl(limit, res, ComparatorAscendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
getPermutationImpl(limit, res, ComparatorAscendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
getPermutationImpl(limit, res, ComparatorDescendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
getPermutationImpl(limit, res, ComparatorDescendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
}
void ColumnVariant::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, DB::EqualRanges & equal_ranges) const
{
auto comparator_equal = ComparatorEqual(*this, nan_direction_hint);
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
}
void ColumnVariant::reserve(size_t n)
@ -1216,7 +1290,14 @@ UInt64 ColumnVariant::getNumberOfDefaultRows() const
void ColumnVariant::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnVariant>(indices, from, limit);
size_t to = limit && from + limit < size() ? from + limit : size();
indices.reserve(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
{
if (!isDefaultAt(i))
indices.push_back(i);
}
}
void ColumnVariant::finalize()

View File

@ -7,11 +7,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/**
* Column for storing Variant(...) type values.
* Variant type represents a union of other data types.
@ -59,7 +54,7 @@ namespace ErrorCodes
* 1 2
*
*/
class ColumnVariant final : public COWHelper<IColumn, ColumnVariant>
class ColumnVariant final : public COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>
{
public:
using Discriminator = UInt8;
@ -70,8 +65,16 @@ public:
static constexpr UInt8 NULL_DISCRIMINATOR = std::numeric_limits<Discriminator>::max(); /// 255
static constexpr size_t MAX_NESTED_COLUMNS = std::numeric_limits<Discriminator>::max(); /// 255
struct ComparatorBase;
using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
private:
friend class COWHelper<IColumn, ColumnVariant>;
friend class COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>;
using NestedColumns = std::vector<WrappedPtr>;
@ -100,7 +103,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other variants.
* Use IColumn::mutate in order to make mutable column and mutate shared nested variants.
*/
using Base = COWHelper<IColumn, ColumnVariant>;
using Base = COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>;
static Ptr create(const Columns & variants_) { return create(variants_, {}); }
static Ptr create(const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_) { return create(local_discriminators_, variants_, {}); }
@ -174,6 +177,7 @@ public:
StringRef getDataAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertIntoVariant(const Field & x, Discriminator global_discr);
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -181,7 +185,7 @@ public:
void insertDefault() override;
void insertManyDefaults(size_t length) override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -195,23 +199,12 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
/// Variant type is not comparable.
int compareAt(size_t, size_t, const IColumn &, int) const override
{
return 0;
}
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method compareColumn is not supported for ColumnVariant");
}
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;

View File

@ -20,6 +20,7 @@
#include <Common/TargetSpecific.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/findExtreme.h>
#include <Common/iota.h>
#include <bit>
@ -51,31 +52,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
template <typename T>
StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
unalignedStore<T>(pos, data[n]);
return res;
}
template <typename T>
const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
{
@ -248,6 +224,26 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
iota(res.data(), data_size, IColumn::Permutation::value_type(0));
if constexpr (has_find_extreme_implementation<T> && !std::is_floating_point_v<T>)
{
/// Disabled for:floating point
/// * floating point: We don't deal with nan_direction_hint
/// * stability::Stable: We might return any value, not the first
if ((limit == 1) && (stability == IColumn::PermutationSortStability::Unstable))
{
std::optional<size_t> index;
if (direction == IColumn::PermutationSortDirection::Ascending)
index = findExtremeMinIndex(data.data(), 0, data.size());
else
index = findExtremeMaxIndex(data.data(), 0, data.size());
if (index)
{
res.data()[0] = *index;
return;
}
}
}
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
{
if (!limit)
@ -862,12 +858,6 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
return res;
}
template <typename T>
void ColumnVector<T>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
template <typename T>
void ColumnVector<T>::getExtremes(Field & min, Field & max) const
{

View File

@ -1,16 +1,15 @@
#pragma once
#include <cmath>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Common/TargetSpecific.h>
#include <Common/assert_cast.h>
#include <Core/CompareHelper.h>
#include <Core/Field.h>
#include <Core/TypeId.h>
#include <base/TypeName.h>
#include <base/unaligned.h>
#include <Common/TargetSpecific.h>
#include <Common/assert_cast.h>
#include "config.h"
@ -30,13 +29,13 @@ namespace ErrorCodes
/** A template for columns that use a simple array to store.
*/
template <typename T>
class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>
class ColumnVector final : public COWHelper<IColumnHelper<ColumnVector<T>, ColumnFixedSizeHelper>, ColumnVector<T>>
{
static_assert(!is_decimal<T>);
private:
using Self = ColumnVector;
friend class COWHelper<ColumnVectorHelper, Self>;
friend class COWHelper<IColumnHelper<Self, ColumnFixedSizeHelper>, Self>;
struct less;
struct less_stable;
@ -101,8 +100,6 @@ public:
data.resize_assume_reserved(data.size() - n);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
@ -158,19 +155,6 @@ public:
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override
{
return this->template doCompareColumn<Self>(assert_cast<const Self &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool hasEqualValues() const override
{
return this->template hasEqualValuesImpl<Self>();
}
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -240,6 +224,14 @@ public:
data.push_back(static_cast<T>(x.get<T>()));
}
bool tryInsert(const DB::Field & x) override
{
NearestFieldType<T> value;
if (!x.tryGet<NearestFieldType<T>>(value))
return false;
data.push_back(static_cast<T>(value));
return true;
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
@ -257,13 +249,6 @@ public:
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override
{
return this->template scatterImpl<Self>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
@ -285,21 +270,6 @@ public:
return typeid(rhs) == typeid(ColumnVector<T>);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
ColumnPtr compress() const override;

View File

@ -1,12 +1,27 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/IColumnDummy.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnObject.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Core/Field.h>
#include <DataTypes/Serializations/SerializationInfo.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
namespace DB
{
@ -38,8 +53,12 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}",
offsets.size(),
shift,
size());
auto res = cloneEmpty();
res->reserve(total_rows);
@ -94,4 +113,355 @@ bool isColumnConst(const IColumn & column)
return checkColumn<ColumnConst>(column);
}
template <typename Derived, typename Parent>
MutableColumns IColumnHelper<Derived, Parent>::scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const
{
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
if (num_rows != selector.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of selector: {} doesn't match size of column: {}", selector.size(), num_rows);
MutableColumns columns(num_columns);
for (auto & column : columns)
column = self.cloneEmpty();
{
size_t reserve_size = static_cast<size_t>(num_rows * 1.1 / num_columns); /// 1.1 is just a guess. Better to use n-sigma rule.
if (reserve_size > 1)
for (auto & column : columns)
column->reserve(reserve_size);
}
for (size_t i = 0; i < num_rows; ++i)
static_cast<Derived &>(*columns[selector[i]]).insertFrom(*this, i);
return columns;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(static_cast<Derived &>(*this));
}
template <typename Derived, bool reversed>
void compareImpl(
const Derived & lhs,
const Derived & rhs,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes [[maybe_unused]],
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint)
{
size_t num_rows = lhs.size();
if (compare_results.empty())
compare_results.resize(num_rows);
else if (compare_results.size() != num_rows)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Size of compare_results: {} doesn't match rows_num: {}",
compare_results.size(),
num_rows);
for (size_t row = 0; row < num_rows; ++row)
{
int res = lhs.compareAt(row, rhs_row_num, rhs, nan_direction_hint);
assert(res == 1 || res == -1 || res == 0);
compare_results[row] = static_cast<Int8>(res);
if constexpr (reversed)
compare_results[row] = -compare_results[row];
}
}
template <typename Derived, bool reversed>
void compareWithIndexImpl(
const Derived & lhs,
const Derived & rhs,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes [[maybe_unused]],
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint)
{
size_t num_rows = lhs.size();
if (compare_results.empty())
compare_results.resize(num_rows);
else if (compare_results.size() != num_rows)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Size of compare_results: {} doesn't match rows_num: {}",
compare_results.size(),
num_rows);
UInt64 * next_index = row_indexes->data();
for (auto row : *row_indexes)
{
int res = lhs.compareAt(row, rhs_row_num, rhs, nan_direction_hint);
assert(res == 1 || res == -1 || res == 0);
compare_results[row] = static_cast<Int8>(res);
if constexpr (reversed)
compare_results[row] = -compare_results[row];
if (compare_results[row] == 0)
{
*next_index = row;
++next_index;
}
}
size_t equal_row_indexes_size = next_index - row_indexes->data();
row_indexes->resize(equal_row_indexes_size);
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::compareColumn(
const IColumn & rhs_base,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const
{
const auto & lhs = static_cast<const Derived &>(*this);
const auto & rhs = static_cast<const Derived &>(rhs_base);
if (direction < 0)
{
if (row_indexes)
compareWithIndexImpl<Derived, true>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
else
compareImpl<Derived, true>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
else if (row_indexes)
{
compareWithIndexImpl<Derived, false>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
else
{
compareImpl<Derived, false>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
}
template <typename Derived, typename Parent>
bool IColumnHelper<Derived, Parent>::hasEqualValues() const
{
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
for (size_t i = 1; i < num_rows; ++i)
{
if (self.compareAt(i, 0, self, 0) != 0)
return false;
}
return true;
}
template <typename Derived, typename Parent>
double IColumnHelper<Derived, Parent>::getRatioOfDefaultRows(double sample_ratio) const
{
if (sample_ratio <= 0.0 || sample_ratio > 1.0)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Value of 'sample_ratio' must be in interval (0.0; 1.0], but got: {}", sample_ratio);
static constexpr auto max_number_of_rows_for_full_search = 1000;
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * sample_ratio), num_rows);
size_t num_checked_rows = 0;
size_t res = 0;
if (num_sampled_rows == num_rows || num_rows <= max_number_of_rows_for_full_search)
{
for (size_t i = 0; i < num_rows; ++i)
res += self.isDefaultAt(i);
num_checked_rows = num_rows;
}
else if (num_sampled_rows != 0)
{
for (size_t i = 0; i < num_rows; ++i)
{
if (num_checked_rows * num_rows <= i * num_sampled_rows)
{
res += self.isDefaultAt(i);
++num_checked_rows;
}
}
}
if (num_checked_rows == 0)
return 0.0;
return static_cast<double>(res) / num_checked_rows;
}
template <typename Derived, typename Parent>
UInt64 IColumnHelper<Derived, Parent>::getNumberOfDefaultRows() const
{
const auto & self = static_cast<const Derived &>(*this);
UInt64 res = 0;
size_t num_rows = self.size();
for (size_t i = 0; i < num_rows; ++i)
res += self.isDefaultAt(i);
return res;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const
{
const auto & self = static_cast<const Derived &>(*this);
size_t to = limit && from + limit < self.size() ? from + limit : self.size();
indices.reserve_exact(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
{
if (!self.isDefaultAt(i))
indices.push_back(i);
}
}
template <typename Derived, typename Parent>
StringRef
IColumnHelper<Derived, Parent>::serializeValueIntoArenaWithNull(size_t n, Arena & arena, char const *& begin, const UInt8 * is_null) const
{
const auto & self = static_cast<const Derived &>(*this);
if (is_null)
{
char * memory;
if (is_null[n])
{
memory = arena.allocContinue(1, begin);
*memory = 1;
return {memory, 1};
}
size_t sz = self.byteSizeAt(n) + 1 /* null byte */;
memory = arena.allocContinue(sz, begin);
*memory = 0;
self.serializeValueIntoMemory(n, memory + 1);
return {memory, sz};
}
else
{
return self.serializeValueIntoArena(n, arena, begin);
}
}
template <typename Derived, typename Parent>
StringRef IColumnHelper<Derived, Parent>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::serializeValueIntoArena(n, arena, begin);
const auto & self = static_cast<const Derived &>(*this);
size_t sz = self.byteSizeAt(n);
char * memory = arena.allocContinue(sz, begin);
self.serializeValueIntoMemory(n, memory);
return {memory, sz};
}
template <typename Derived, typename Parent>
char * IColumnHelper<Derived, Parent>::serializeValueIntoMemoryWithNull(size_t n, char * memory, const UInt8 * is_null) const
{
const auto & self = static_cast<const Derived &>(*this);
if (is_null)
{
*memory = is_null[n];
++memory;
if (is_null[n])
return memory;
}
return self.serializeValueIntoMemory(n, memory);
}
template <typename Derived, typename Parent>
char * IColumnHelper<Derived, Parent>::serializeValueIntoMemory(size_t n, char * memory) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::serializeValueIntoMemory(n, memory);
const auto & self = static_cast<const Derived &>(*this);
auto raw_data = self.getDataAt(n);
memcpy(memory, raw_data.data, raw_data.size);
return memory + raw_data.size;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::collectSerializedValueSizes(sizes, is_null);
const auto & self = static_cast<const Derived &>(*this);
size_t rows = self.size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
if (rows == 0)
return;
size_t element_size = self.byteSizeAt(0);
if (is_null)
{
for (size_t i = 0; i < rows; ++i)
{
if (is_null[i])
++sizes[i];
else
sizes[i] += element_size + 1 /* null byte */;
}
}
else
{
for (auto & sz : sizes)
sz += element_size;
}
}
template class IColumnHelper<ColumnVector<UInt8>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt16>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int8>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int16>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Float32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Float64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UUID>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<IPv4>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<IPv6>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<DateTime64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnString, IColumn>;
template class IColumnHelper<ColumnLowCardinality, IColumn>;
template class IColumnHelper<ColumnNullable, IColumn>;
template class IColumnHelper<ColumnConst, IColumn>;
template class IColumnHelper<ColumnArray, IColumn>;
template class IColumnHelper<ColumnTuple, IColumn>;
template class IColumnHelper<ColumnMap, IColumn>;
template class IColumnHelper<ColumnSparse, IColumn>;
template class IColumnHelper<ColumnObject, IColumn>;
template class IColumnHelper<ColumnAggregateFunction, IColumn>;
template class IColumnHelper<ColumnFunction, IColumn>;
template class IColumnHelper<ColumnCompressed, IColumn>;
template class IColumnHelper<ColumnVariant, IColumn>;
template class IColumnHelper<IColumnDummy, IColumn>;
}

View File

@ -166,6 +166,10 @@ public:
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
virtual void insert(const Field & x) = 0;
/// Appends new value at the end of the column if it has appropriate type and
/// returns true if insert is successful and false otherwise.
virtual bool tryInsert(const Field & x) = 0;
/// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
virtual void insertFrom(const IColumn & src, size_t n);
@ -219,7 +223,38 @@ public:
* For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes.
* Parameter begin should be used with Arena::allocContinue.
*/
virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const = 0;
virtual StringRef serializeValueIntoArena(size_t /* n */, Arena & /* arena */, char const *& /* begin */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoArena is not supported for {}", getName());
}
/// Same as above but serialize into already allocated continuous memory.
/// Return pointer to the end of the serialization data.
virtual char * serializeValueIntoMemory(size_t /* n */, char * /* memory */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoMemory is not supported for {}", getName());
}
/// Nullable variant to avoid calling virtualized method inside ColumnNullable.
virtual StringRef
serializeValueIntoArenaWithNull(size_t /* n */, Arena & /* arena */, char const *& /* begin */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoArenaWithNull is not supported for {}", getName());
}
virtual char * serializeValueIntoMemoryWithNull(size_t /* n */, char * /* memory */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoMemoryWithNull is not supported for {}", getName());
}
/// Calculate all the sizes of serialized data in column, then added to `sizes`.
/// If `is_null` is not nullptr, also take null bit into account.
/// This is currently used to facilitate the allocation of memory for an entire continuous row
/// in a single step. For more details, refer to the HashMethodSerialized implementation.
virtual void collectSerializedValueSizes(PaddedPODArray<UInt64> & /* sizes */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method collectSerializedValueSizes is not supported for {}", getName());
}
/// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
/// Returns pointer to the position after the read data.
@ -570,43 +605,18 @@ public:
[[nodiscard]] String dumpStructure() const;
protected:
/// Template is to devirtualize calls to insertFrom method.
/// In derived classes (that use final keyword), implement scatter method as call to scatterImpl.
template <typename Derived>
std::vector<MutablePtr> scatterImpl(ColumnIndex num_columns, const Selector & selector) const;
template <typename Derived, bool reversed, bool use_indexes>
void compareImpl(const Derived & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint) const;
template <typename Derived>
void doCompareColumn(const Derived & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const;
template <typename Derived>
bool hasEqualValuesImpl() const;
/// Template is to devirtualize calls to 'isDefaultAt' method.
template <typename Derived>
double getRatioOfDefaultRowsImpl(double sample_ratio) const;
template <typename Derived>
UInt64 getNumberOfDefaultRowsImpl() const;
template <typename Derived>
void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const;
template <typename Compare, typename Sort, typename PartialSort>
void getPermutationImpl(size_t limit, Permutation & res, Compare compare,
Sort full_sort, PartialSort partial_sort) const;
void getPermutationImpl(size_t limit, Permutation & res, Compare compare, Sort full_sort, PartialSort partial_sort) const;
template <typename Compare, typename Equals, typename Sort, typename PartialSort>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Compare compare, Equals equals,
Sort full_sort, PartialSort partial_sort) const;
void updatePermutationImpl(
size_t limit,
Permutation & res,
EqualRanges & equal_ranges,
Compare compare,
Equals equals,
Sort full_sort,
PartialSort partial_sort) const;
};
using ColumnPtr = IColumn::Ptr;
@ -623,7 +633,7 @@ struct IsMutableColumns;
template <typename Arg, typename ... Args>
struct IsMutableColumns<Arg, Args ...>
{
static const bool value = std::is_assignable<MutableColumnPtr &&, Arg>::value && IsMutableColumns<Args ...>::value;
static const bool value = std::is_assignable_v<MutableColumnPtr &&, Arg> && IsMutableColumns<Args ...>::value;
};
template <>
@ -663,4 +673,47 @@ bool isColumnNullable(const IColumn & column);
/// True if column's is ColumnNullable or ColumnLowCardinality with nullable nested column.
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column);
/// Implement methods to devirtualize some calls of IColumn in final descendents.
/// `typename Parent` is needed because some columns don't inherit IColumn directly.
/// See ColumnFixedSizeHelper for example.
template <typename Derived, typename Parent = IColumn>
class IColumnHelper : public Parent
{
/// Devirtualize insertFrom.
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override;
/// Devirtualize insertFrom and insertRangeFrom.
void gather(ColumnGathererStream & gatherer) override;
/// Devirtualize compareAt.
void compareColumn(
const IColumn & rhs_base,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const override;
/// Devirtualize compareAt.
bool hasEqualValues() const override;
/// Devirtualize isDefaultAt.
double getRatioOfDefaultRows(double sample_ratio) const override;
/// Devirtualize isDefaultAt.
UInt64 getNumberOfDefaultRows() const override;
/// Devirtualize isDefaultAt.
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override;
/// Devirtualize byteSizeAt.
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
/// Move common implementations into the same translation unit to ensure they are properly inlined.
char * serializeValueIntoMemoryWithNull(size_t n, char * memory, const UInt8 * is_null) const override;
StringRef serializeValueIntoArenaWithNull(size_t n, Arena & arena, char const *& begin, const UInt8 * is_null) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
};
}

View File

@ -35,7 +35,7 @@ bool IColumnDummy::isDefaultAt(size_t) const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName());
}
StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const
StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const
{
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
char * res = arena.allocContinue(1, begin);

View File

@ -11,7 +11,7 @@ class Arena;
/** Base class for columns-constants that contain a value that is not in the `Field`.
* Not a full-fledged column and is used in a special way.
*/
class IColumnDummy : public IColumn
class IColumnDummy : public IColumnHelper<IColumnDummy>
{
public:
IColumnDummy() : s(0) {}
@ -36,6 +36,7 @@ public:
Field operator[](size_t) const override;
void get(size_t, Field &) const override;
void insert(const Field &) override;
bool tryInsert(const Field &) override { return false; }
bool isDefaultAt(size_t) const override;
StringRef getDataAt(size_t) const override
@ -48,8 +49,10 @@ public:
++s;
}
StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t /*n*/, SipHash & /*hash*/) const override

Some files were not shown because too many files have changed in this diff Show More